You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

core.go 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. package publock
  2. import (
  3. "fmt"
  4. "io"
  5. "sync"
  6. "time"
  7. "gitlink.org.cn/cloudream/common/pkgs/async"
  8. "gitlink.org.cn/cloudream/common/pkgs/logger"
  9. "gitlink.org.cn/cloudream/common/pkgs/trie"
  10. "gitlink.org.cn/cloudream/common/pkgs/types"
  11. "gitlink.org.cn/cloudream/common/utils/lo2"
  12. "gitlink.org.cn/cloudream/common/utils/serder"
  13. "gitlink.org.cn/cloudream/jcs-pub/client/internal/cluster"
  14. "gitlink.org.cn/cloudream/jcs-pub/client/internal/publock/lockprovider"
  15. pubtypes "gitlink.org.cn/cloudream/jcs-pub/client/internal/publock/types"
  16. "gitlink.org.cn/cloudream/jcs-pub/common/ecode"
  17. )
  18. type Core struct {
  19. cfg Config
  20. clster *cluster.Cluster
  21. fsm *ClusterFSM
  22. lock *sync.Mutex
  23. provdersTrie *trie.Trie[pubtypes.LockProvider]
  24. acquirings []*acquiring // 必须使用数组,因为要保证顺序(集群多个节点的执行结果应该严格相同)
  25. acquireds map[pubtypes.RequestID]*acquired
  26. eventCh *async.UnboundChannel[CoreEvent]
  27. doneCh chan any
  28. }
  29. func NewCore(cfg Config, clster *cluster.Cluster) *Core {
  30. c := &Core{
  31. cfg: cfg,
  32. clster: clster,
  33. lock: &sync.Mutex{},
  34. provdersTrie: trie.NewTrie[pubtypes.LockProvider](),
  35. acquireds: make(map[pubtypes.RequestID]*acquired),
  36. eventCh: async.NewUnboundChannel[CoreEvent](),
  37. doneCh: make(chan any, 1),
  38. }
  39. c.fsm = &ClusterFSM{
  40. core: c,
  41. }
  42. c.provdersTrie.Create([]any{lockprovider.UserSpaceLockPathPrefix, trie.WORD_ANY}).Value = lockprovider.NewUserSpaceLock()
  43. c.provdersTrie.Create([]any{lockprovider.PackageLockPathPrefix, trie.WORD_ANY}).Value = lockprovider.NewPackageLock()
  44. return c
  45. }
  46. func (s *Core) Apply(cmd Command) {
  47. switch cmd := cmd.(type) {
  48. case *Acquire:
  49. s.acquire(*cmd)
  50. case *AcquireTimeout:
  51. s.acquireTimeout(*cmd)
  52. case *Release:
  53. s.release(*cmd)
  54. case *LeaseExpired:
  55. s.leaseExpired(*cmd)
  56. case *Renew:
  57. s.renew(*cmd)
  58. }
  59. }
  60. func (s *Core) EventChan() *async.UnboundChannel[CoreEvent] {
  61. return s.eventCh
  62. }
  63. func (s *Core) FSM() cluster.FSM {
  64. return s.fsm
  65. }
  66. func (s *Core) Start() {
  67. log := logger.WithField("Mod", "Publock.Core")
  68. go func() {
  69. ticker := time.NewTicker(time.Second)
  70. defer ticker.Stop()
  71. loop:
  72. for {
  73. select {
  74. case <-s.doneCh:
  75. break loop
  76. case <-ticker.C:
  77. var acTimeout []pubtypes.RequestID
  78. var leExpired []pubtypes.RequestID
  79. s.lock.Lock()
  80. // 定时清理超时的锁请求
  81. for _, req := range s.acquirings {
  82. if time.Since(req.StartTime) < req.Cmd.Timeout {
  83. continue
  84. }
  85. acTimeout = append(acTimeout, req.Cmd.ID)
  86. }
  87. // 定时清理过期的锁
  88. for reqID, ac := range s.acquireds {
  89. if ac.ExpireCounter < s.cfg.LeaseExpiredSeconds {
  90. ac.ExpireCounter++
  91. continue
  92. }
  93. leExpired = append(leExpired, reqID)
  94. }
  95. s.lock.Unlock()
  96. for _, reqID := range acTimeout {
  97. cmd := AcquireTimeout{
  98. ID: reqID,
  99. }
  100. if s.clster != nil {
  101. data, err := serder.ObjectToJSONEx(cmd)
  102. if err != nil {
  103. log.Warnf("cmd %T to json: %v", cmd, err)
  104. continue
  105. }
  106. // 不管是否成功,有定时任务兜底
  107. s.clster.Apply(s.fsm.ID(), data, time.Second*3)
  108. } else {
  109. s.acquireTimeout(AcquireTimeout{
  110. ID: reqID,
  111. })
  112. }
  113. }
  114. for _, reqID := range leExpired {
  115. cmd := LeaseExpired{
  116. ID: reqID,
  117. }
  118. if s.clster != nil {
  119. data, err := serder.ObjectToJSONEx(cmd)
  120. if err != nil {
  121. log.Warnf("cmd %T to json: %v", cmd, err)
  122. continue
  123. }
  124. // 不管是否成功,有定时任务兜底
  125. s.clster.Apply(s.fsm.ID(), data, time.Second*3)
  126. } else {
  127. s.leaseExpired(LeaseExpired{
  128. ID: reqID,
  129. })
  130. }
  131. }
  132. }
  133. }
  134. }()
  135. }
  136. func (s *Core) Stop() {
  137. select {
  138. case s.doneCh <- nil:
  139. default:
  140. }
  141. s.eventCh.Close()
  142. }
  143. type acquiring struct {
  144. Cmd Acquire
  145. LastErr *ecode.CodeError
  146. // 这个值来自自每个节点自身,所以可能会各不相同。
  147. // 但这个值只是作为判断获取锁是否超时的依据,所以问题不大。
  148. StartTime time.Time
  149. }
  150. type acquired struct {
  151. ID pubtypes.RequestID
  152. Req pubtypes.LockRequest
  153. // 这个值用来记录锁经过的过期检查的次数,超过一定次数则认为过期。
  154. // 因为未通知加锁的服务而释放一个锁是一个危险操作,所以这里采用这种计数的方式来实现过期
  155. ExpireCounter int
  156. }
  157. func (c *Core) acquire(cmd Acquire) {
  158. c.lock.Lock()
  159. defer c.lock.Unlock()
  160. // 立刻检测锁是否可用
  161. cerr := c.tryAcquireOne(cmd.ID, cmd.Request)
  162. if cerr == nil {
  163. err := c.eventCh.Send(&AcquireResult{
  164. Raw: cmd,
  165. Error: nil,
  166. })
  167. if err != nil {
  168. panic(err)
  169. }
  170. return
  171. }
  172. // 不可用则加入等待列表
  173. info := &acquiring{
  174. Cmd: cmd,
  175. LastErr: cerr,
  176. StartTime: time.Now(),
  177. }
  178. c.acquirings = append(c.acquirings, info)
  179. go func() {
  180. log := logger.WithField("Mod", "Publock.Core")
  181. <-time.After(cmd.Timeout)
  182. ac := AcquireTimeout{
  183. ID: cmd.ID,
  184. }
  185. data, err := serder.ObjectToJSONEx(ac)
  186. if err != nil {
  187. log.Warnf("cmd %T to json: %v", cmd, err)
  188. return
  189. }
  190. // 不管是否成功,有定时任务兜底
  191. c.clster.Apply(c.fsm.ID(), data, cmd.Timeout)
  192. }()
  193. }
  194. func (s *Core) release(cmd Release) {
  195. reqID := cmd.ID
  196. s.lock.Lock()
  197. defer s.lock.Unlock()
  198. ac, ok := s.acquireds[reqID]
  199. if !ok {
  200. return
  201. }
  202. s.releaseRequest(reqID, ac.Req)
  203. s.eventCh.Send(&Released{
  204. ID: reqID,
  205. })
  206. s.tryAcquirings()
  207. }
  208. func (c *Core) acquireTimeout(cmd AcquireTimeout) {
  209. c.lock.Lock()
  210. defer c.lock.Unlock()
  211. for i, req := range c.acquirings {
  212. if req.Cmd.ID == cmd.ID {
  213. c.eventCh.Send(&AcquireResult{
  214. Raw: req.Cmd,
  215. Error: req.LastErr,
  216. })
  217. c.acquirings = lo2.RemoveAt(c.acquirings, i)
  218. return
  219. }
  220. }
  221. }
  222. func (c *Core) leaseExpired(cmd LeaseExpired) {
  223. log := logger.WithField("Mod", "Publock.Core")
  224. c.lock.Lock()
  225. defer c.lock.Unlock()
  226. ac, ok := c.acquireds[cmd.ID]
  227. if !ok {
  228. return
  229. }
  230. log.Warnf("lock request %v lease expired", ac.ID)
  231. c.releaseRequest(ac.ID, ac.Req)
  232. c.tryAcquirings()
  233. }
  234. func (c *Core) renew(cmd Renew) {
  235. c.lock.Lock()
  236. defer c.lock.Unlock()
  237. for _, reqID := range cmd.IDs {
  238. ac, ok := c.acquireds[reqID]
  239. if !ok {
  240. continue
  241. }
  242. ac.ExpireCounter = 0
  243. }
  244. }
  245. func (a *Core) tryAcquirings() {
  246. for i := 0; i < len(a.acquirings); i++ {
  247. req := a.acquirings[i]
  248. err := a.tryAcquireOne(req.Cmd.ID, req.Cmd.Request)
  249. if err != nil {
  250. req.LastErr = err
  251. continue
  252. }
  253. a.eventCh.Send(&AcquireResult{
  254. Raw: req.Cmd,
  255. Error: nil,
  256. })
  257. a.acquirings[i] = nil
  258. }
  259. a.acquirings = lo2.RemoveAllDefault(a.acquirings)
  260. }
  261. func (s *Core) tryAcquireOne(reqID pubtypes.RequestID, req pubtypes.LockRequest) *ecode.CodeError {
  262. cerr := s.testOneRequest(req)
  263. if cerr != nil {
  264. return cerr
  265. }
  266. s.applyRequest(reqID, req)
  267. s.acquireds[reqID] = &acquired{
  268. ID: reqID,
  269. Req: req,
  270. ExpireCounter: 0,
  271. }
  272. return nil
  273. }
  274. func (s *Core) testOneRequest(req pubtypes.LockRequest) *ecode.CodeError {
  275. for _, lock := range req.Locks {
  276. n, ok := s.provdersTrie.WalkEnd(lock.Path)
  277. if !ok || n.Value == nil {
  278. return ecode.Newf(ecode.DataNotFound, "lock provider not found for path %v", lock.Path)
  279. }
  280. err := n.Value.CanLock(lock)
  281. if err != nil {
  282. return ecode.Newf(ecode.LockConflict, "%v", err)
  283. }
  284. }
  285. return nil
  286. }
  287. func (s *Core) applyRequest(reqID pubtypes.RequestID, req pubtypes.LockRequest) {
  288. for _, lock := range req.Locks {
  289. p, _ := s.provdersTrie.WalkEnd(lock.Path)
  290. p.Value.Lock(reqID, lock)
  291. }
  292. }
  293. func (s *Core) releaseRequest(reqID pubtypes.RequestID, req pubtypes.LockRequest) {
  294. for _, lock := range req.Locks {
  295. p, _ := s.provdersTrie.WalkEnd(lock.Path)
  296. p.Value.Unlock(reqID, lock)
  297. }
  298. delete(s.acquireds, reqID)
  299. }
  300. type LockedRequest struct {
  301. Req pubtypes.LockRequest
  302. ReqID pubtypes.RequestID
  303. }
  304. type Command interface {
  305. IsCommand() bool
  306. }
  307. var _ = serder.UseTypeUnionExternallyTagged(types.Ref(types.NewTypeUnion[Command](
  308. (*Acquire)(nil),
  309. (*Release)(nil),
  310. (*AcquireTimeout)(nil),
  311. (*LeaseExpired)(nil),
  312. (*Renew)(nil),
  313. )))
  314. type Acquire struct {
  315. ID pubtypes.RequestID
  316. Request pubtypes.LockRequest
  317. Timeout time.Duration
  318. Reason string
  319. }
  320. func (a *Acquire) IsCommand() bool {
  321. return true
  322. }
  323. type Release struct {
  324. ID pubtypes.RequestID
  325. }
  326. func (r *Release) IsCommand() bool {
  327. return true
  328. }
  329. type AcquireTimeout struct {
  330. ID pubtypes.RequestID
  331. }
  332. func (a *AcquireTimeout) IsCommand() bool {
  333. return true
  334. }
  335. type LeaseExpired struct {
  336. ID pubtypes.RequestID
  337. }
  338. func (l *LeaseExpired) IsCommand() bool {
  339. return true
  340. }
  341. type Renew struct {
  342. IDs []pubtypes.RequestID
  343. }
  344. func (r *Renew) IsCommand() bool {
  345. return true
  346. }
  347. type CoreEvent interface {
  348. IsCoreEvent() bool
  349. }
  350. type AcquireResult struct {
  351. Raw Acquire
  352. Error *ecode.CodeError
  353. }
  354. func (a *AcquireResult) IsCoreEvent() bool {
  355. return true
  356. }
  357. type Released struct {
  358. ID pubtypes.RequestID
  359. }
  360. func (r *Released) IsCoreEvent() bool {
  361. return true
  362. }
  363. type ClusterFSM struct {
  364. core *Core
  365. }
  366. func (f *ClusterFSM) ID() string {
  367. return "Publock"
  368. }
  369. func (f *ClusterFSM) Apply(cmdData []byte) ([]byte, error) {
  370. cmd, err := serder.JSONToObjectEx[Command](cmdData)
  371. if err != nil {
  372. return nil, fmt.Errorf("parse command: %v", err)
  373. }
  374. f.core.Apply(cmd)
  375. return nil, nil
  376. }
  377. func (f *ClusterFSM) Snapshot() (cluster.FSMSnapshot, error) {
  378. log := logger.WithField("Mod", "Publock.ClusterFSM")
  379. log.Debugf("make snapshot")
  380. f.core.lock.Lock()
  381. defer f.core.lock.Unlock()
  382. acquireds := make([]*acquired, 0, len(f.core.acquireds))
  383. for _, ac := range f.core.acquireds {
  384. newAc := &acquired{
  385. ID: ac.ID,
  386. Req: ac.Req,
  387. ExpireCounter: ac.ExpireCounter,
  388. }
  389. acquireds = append(acquireds, newAc)
  390. }
  391. acquirings := make([]*acquiring, 0, len(f.core.acquirings))
  392. for _, ac := range f.core.acquirings {
  393. newAc := &acquiring{
  394. Cmd: ac.Cmd,
  395. LastErr: ac.LastErr,
  396. StartTime: ac.StartTime,
  397. }
  398. acquirings = append(acquirings, newAc)
  399. }
  400. return &FSMSnapshot{
  401. Acquireds: acquireds,
  402. Acquirings: acquirings,
  403. }, nil
  404. }
  405. func (f *ClusterFSM) Restore(input io.Reader) error {
  406. log := logger.WithField("Mod", "Publock.ClusterFSM")
  407. log.Debugf("restore from input")
  408. snap := &FSMSnapshot{}
  409. err := serder.JSONToObjectStream(input, snap)
  410. if err != nil {
  411. return err
  412. }
  413. f.core.lock.Lock()
  414. defer f.core.lock.Unlock()
  415. f.core.provdersTrie.Walk(nil, func(word string, wordIndex int, node *trie.Node[pubtypes.LockProvider], isWordNode bool) {
  416. if node.Value != nil {
  417. node.Value.Clear()
  418. }
  419. })
  420. f.core.acquireds = make(map[pubtypes.RequestID]*acquired)
  421. for _, a := range snap.Acquireds {
  422. f.core.applyRequest(a.ID, a.Req)
  423. }
  424. f.core.acquirings = snap.Acquirings
  425. for _, req := range f.core.acquirings {
  426. // 已经超时的请求不启动精确的定时任务
  427. if time.Since(req.StartTime) > req.Cmd.Timeout {
  428. continue
  429. }
  430. go func() {
  431. <-time.After(req.Cmd.Timeout - time.Since(req.StartTime))
  432. cmd := AcquireTimeout{
  433. ID: req.Cmd.ID,
  434. }
  435. data, err := serder.ObjectToJSONEx(cmd)
  436. if err != nil {
  437. log.Warnf("cmd %T to json: %v", cmd, err)
  438. return
  439. }
  440. // 不管是否成功,有定时任务兜底
  441. f.core.clster.Apply(f.core.fsm.ID(), data, req.Cmd.Timeout)
  442. }()
  443. }
  444. return nil
  445. }
  446. type FSMSnapshot struct {
  447. Acquireds []*acquired
  448. Acquirings []*acquiring
  449. }
  450. func (s *FSMSnapshot) Persist(output io.Writer) error {
  451. rc := serder.ObjectToJSONStream(s)
  452. defer rc.Close()
  453. _, err := io.Copy(output, rc)
  454. return err
  455. }
  456. func (s *FSMSnapshot) Release() {}

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。