You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cluster.go 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. package cluster
  2. import (
  3. "crypto/tls"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. "sync"
  8. "time"
  9. "github.com/hashicorp/raft"
  10. raftboltdb "github.com/hashicorp/raft-boltdb"
  11. "github.com/samber/lo"
  12. "gitlink.org.cn/cloudream/common/pkgs/async"
  13. "gitlink.org.cn/cloudream/common/pkgs/logger"
  14. "gitlink.org.cn/cloudream/jcs-pub/common/ecode"
  15. "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc"
  16. clirpc "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc/client"
  17. "google.golang.org/grpc"
  18. "google.golang.org/grpc/credentials"
  19. )
  20. type Cluster struct {
  21. cfg Config
  22. poolCfg clirpc.PoolConfig
  23. masterCli MasterClient
  24. raft *raft.Raft
  25. transport *Transport
  26. doneCh chan any
  27. }
  28. func New(cfg Config) *Cluster {
  29. return &Cluster{
  30. cfg: cfg,
  31. masterCli: MasterClient{
  32. Client: clirpc.NewFusedClient(rpc.Failed(ecode.OperationFailed, "master unknown")),
  33. },
  34. doneCh: make(chan any, 1),
  35. }
  36. }
  37. func (c *Cluster) Start() (*ClusterEventChan, error) {
  38. log := logger.WithField("Mod", "Cluster")
  39. ch := async.NewUnboundChannel[ClusterEvent]()
  40. if !c.cfg.Enabled {
  41. log.Infof("cluster disabled")
  42. return ch, nil
  43. }
  44. poolCfgJSON := clirpc.PoolConfigJSON{
  45. RootCA: c.cfg.RootCA,
  46. ClientCert: c.cfg.ClientCert,
  47. ClientKey: c.cfg.ClientKey,
  48. }
  49. poolCfg, err := poolCfgJSON.Build()
  50. if err != nil {
  51. return nil, fmt.Errorf("build pool config: %w", err)
  52. }
  53. c.poolCfg = *poolCfg
  54. raftCfg := raft.DefaultConfig()
  55. raftCfg.LocalID = raft.ServerID(c.cfg.Announce)
  56. err = os.MkdirAll(c.cfg.StoreBase, 0755)
  57. if err != nil {
  58. return nil, fmt.Errorf("create store base dir: %w", err)
  59. }
  60. logDB, err := raftboltdb.NewBoltStore(filepath.Join(c.cfg.StoreBase, "log.db"))
  61. if err != nil {
  62. return nil, fmt.Errorf("create raft log store: %w", err)
  63. }
  64. stableDB, err := raftboltdb.NewBoltStore(filepath.Join(c.cfg.StoreBase, "stable.db"))
  65. if err != nil {
  66. return nil, fmt.Errorf("create raft stable store: %w", err)
  67. }
  68. snapshotStore, err := raft.NewFileSnapshotStore(c.cfg.StoreBase, 1, os.Stdout)
  69. if err != nil {
  70. return nil, fmt.Errorf("create raft snapshot store: %w", err)
  71. }
  72. fsm := NewFSM()
  73. c.transport = NewTransport(c.cfg.Announce, *poolCfg)
  74. rft, err := raft.NewRaft(raftCfg, fsm, logDB, stableDB, snapshotStore, c.transport)
  75. if err != nil {
  76. return nil, fmt.Errorf("create raft: %w", err)
  77. }
  78. hasState, err := raft.HasExistingState(logDB, stableDB, snapshotStore)
  79. if err != nil {
  80. return nil, fmt.Errorf("check has existing state: %w", err)
  81. }
  82. if !hasState {
  83. bootCfg := raft.Configuration{}
  84. if !lo.Contains(c.cfg.Peers, c.cfg.Announce) {
  85. bootCfg.Servers = append(bootCfg.Servers, raft.Server{
  86. ID: raft.ServerID(c.cfg.Announce),
  87. Address: raft.ServerAddress(c.cfg.Announce),
  88. })
  89. }
  90. for _, peer := range c.cfg.Peers {
  91. bootCfg.Servers = append(bootCfg.Servers, raft.Server{
  92. ID: raft.ServerID(peer),
  93. Address: raft.ServerAddress(peer),
  94. })
  95. }
  96. bootFut := rft.BootstrapCluster(bootCfg)
  97. if err := bootFut.Error(); err != nil {
  98. return nil, fmt.Errorf("bootstrap cluster: %w", err)
  99. }
  100. log.Infof("bootstrap new cluster")
  101. } else {
  102. log.Infof("start existing cluster")
  103. }
  104. ch.Send(&BootstrapEvent{})
  105. c.raft = rft
  106. eventCh := make(chan raft.Observation, 1)
  107. obs := raft.NewObserver(eventCh, true, nil)
  108. rft.RegisterObserver(obs)
  109. go func() {
  110. loop:
  111. for {
  112. select {
  113. case <-c.doneCh:
  114. break loop
  115. case e := <-eventCh:
  116. state, ok := e.Data.(raft.RaftState)
  117. if !ok {
  118. continue
  119. }
  120. switch state {
  121. case raft.Leader:
  122. log.Info("become leader")
  123. ch.Send(&LeaderEvent{
  124. CurrentIsMaster: true,
  125. Address: c.cfg.Announce,
  126. })
  127. case raft.Follower:
  128. addr, id := rft.LeaderWithID()
  129. log.Infof("become follower, master is: %v, %v", id, addr)
  130. ch.Send(&LeaderEvent{
  131. CurrentIsMaster: false,
  132. Address: string(addr),
  133. })
  134. case raft.Candidate:
  135. log.Info("become candidate")
  136. }
  137. }
  138. }
  139. c.raft.DeregisterObserver(obs)
  140. }()
  141. return ch, nil
  142. }
  143. func (c *Cluster) Stop() {
  144. c.raft.Shutdown().Error()
  145. select {
  146. case c.doneCh <- nil:
  147. default:
  148. }
  149. }
  150. func (c *Cluster) ID() string {
  151. return c.cfg.Announce
  152. }
  153. func (c *Cluster) IsMaster() bool {
  154. addr, _ := c.raft.LeaderWithID()
  155. return string(addr) == c.cfg.Announce
  156. }
  157. func (c *Cluster) Enabled() bool {
  158. return c.cfg.Enabled
  159. }
  160. func (c *Cluster) Name() string {
  161. return c.cfg.NodeName
  162. }
  163. // 由于主节点可能会变化,因此不要缓存MasterClient,每次都重新获取
  164. func (c *Cluster) MasterClient() *MasterClient {
  165. addr, _ := c.raft.LeaderWithID()
  166. c.masterCli.lock.Lock()
  167. defer c.masterCli.lock.Unlock()
  168. addr2 := string(addr)
  169. if addr2 == "" {
  170. if c.masterCli.con != nil {
  171. c.masterCli.con.Close()
  172. }
  173. c.masterCli.Client = clirpc.NewFusedClient(rpc.Failed(ecode.ClusterNoMaster, "no master"))
  174. return &c.masterCli
  175. }
  176. if c.masterCli.addr != addr2 {
  177. if c.masterCli.con != nil {
  178. c.masterCli.con.Close()
  179. }
  180. gcon, err := grpc.NewClient(addr2, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{
  181. RootCAs: c.poolCfg.Conn.RootCA,
  182. Certificates: []tls.Certificate{*c.poolCfg.Conn.ClientCert},
  183. ServerName: rpc.InternalAPISNIV1,
  184. NextProtos: []string{"h2"},
  185. })))
  186. if err != nil {
  187. c.masterCli.Client = clirpc.NewFusedClient(rpc.Failed(ecode.OperationFailed, "%v", err))
  188. c.masterCli.addr = ""
  189. } else {
  190. c.masterCli.con = gcon
  191. c.masterCli.Client = clirpc.NewClient(gcon)
  192. c.masterCli.addr = addr2
  193. }
  194. }
  195. return &c.masterCli
  196. }
  197. func (c *Cluster) RaftTransport() *Transport {
  198. return c.transport
  199. }
  200. // 只有Leader才能调用
  201. func (c *Cluster) Apply(data string, timeout time.Duration) error {
  202. return c.raft.Apply([]byte(data), timeout).Error()
  203. }
  204. type ClusterEvent interface {
  205. IsClusterEvent() bool
  206. }
  207. type ClusterEventChan = async.UnboundChannel[ClusterEvent]
  208. type ExitEvent struct {
  209. Err error
  210. }
  211. func (e *ExitEvent) IsClusterEvent() bool {
  212. return true
  213. }
  214. type BootstrapEvent struct{}
  215. func (e *BootstrapEvent) IsClusterEvent() bool {
  216. return true
  217. }
  218. type LeaderEvent struct {
  219. CurrentIsMaster bool
  220. Address string
  221. }
  222. func (e *LeaderEvent) IsClusterEvent() bool {
  223. return true
  224. }
  225. type MasterClient struct {
  226. *clirpc.Client
  227. con *grpc.ClientConn
  228. addr string
  229. lock sync.Mutex
  230. }
  231. func (c *MasterClient) Release() {
  232. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。