|
- package cluster
-
- import (
- "crypto/tls"
- "encoding/binary"
- "fmt"
- "os"
- "path/filepath"
- "sync"
- "time"
-
- "github.com/hashicorp/raft"
- raftboltdb "github.com/hashicorp/raft-boltdb"
- "github.com/samber/lo"
- "gitlink.org.cn/cloudream/common/pkgs/async"
- "gitlink.org.cn/cloudream/common/pkgs/logger"
- "gitlink.org.cn/cloudream/jcs-pub/common/ecode"
- "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc"
- clirpc "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc/client"
- "google.golang.org/grpc"
- "google.golang.org/grpc/credentials"
- )
-
- type Cluster struct {
- cfg Config
- poolCfg clirpc.PoolConfig
- masterCli MasterClient
- doneCh chan any
- raft *raft.Raft
- fsm *raftFSM
- transport *Transport
- }
-
- func New(cfg Config) *Cluster {
- return &Cluster{
- cfg: cfg,
- masterCli: MasterClient{
- Client: clirpc.NewFusedClient(rpc.Failed(ecode.OperationFailed, "master unknown")),
- },
- doneCh: make(chan any, 1),
- }
- }
-
- func (c *Cluster) Start(fsms []FSM) (*ClusterEventChan, error) {
- log := logger.WithField("Mod", "Cluster")
-
- ch := async.NewUnboundChannel[ClusterEvent]()
- if !c.cfg.Enabled {
- log.Infof("cluster disabled")
- return ch, nil
- }
-
- c.fsm = NewFSM(fsms)
-
- poolCfgJSON := clirpc.PoolConfigJSON{
- RootCA: c.cfg.RootCA,
- ClientCert: c.cfg.ClientCert,
- ClientKey: c.cfg.ClientKey,
- }
-
- poolCfg, err := poolCfgJSON.Build()
- if err != nil {
- return nil, fmt.Errorf("build pool config: %w", err)
- }
- c.poolCfg = *poolCfg
-
- raftCfg := raft.DefaultConfig()
- raftCfg.LocalID = raft.ServerID(c.cfg.Announce)
-
- err = os.MkdirAll(c.cfg.StoreBase, 0755)
- if err != nil {
- return nil, fmt.Errorf("create store base dir: %w", err)
- }
-
- logDB, err := raftboltdb.NewBoltStore(filepath.Join(c.cfg.StoreBase, "log.db"))
- if err != nil {
- return nil, fmt.Errorf("create raft log store: %w", err)
- }
-
- stableDB, err := raftboltdb.NewBoltStore(filepath.Join(c.cfg.StoreBase, "stable.db"))
- if err != nil {
- return nil, fmt.Errorf("create raft stable store: %w", err)
- }
-
- snapshotStore, err := raft.NewFileSnapshotStore(c.cfg.StoreBase, 1, os.Stdout)
- if err != nil {
- return nil, fmt.Errorf("create raft snapshot store: %w", err)
- }
-
- c.transport = NewTransport(c.cfg.Announce, *poolCfg)
-
- rft, err := raft.NewRaft(raftCfg, c.fsm, logDB, stableDB, snapshotStore, c.transport)
- if err != nil {
- return nil, fmt.Errorf("create raft: %w", err)
- }
-
- hasState, err := raft.HasExistingState(logDB, stableDB, snapshotStore)
- if err != nil {
- return nil, fmt.Errorf("check has existing state: %w", err)
- }
-
- if !hasState {
- bootCfg := raft.Configuration{}
-
- if !lo.Contains(c.cfg.Peers, c.cfg.Announce) {
- bootCfg.Servers = append(bootCfg.Servers, raft.Server{
- ID: raft.ServerID(c.cfg.Announce),
- Address: raft.ServerAddress(c.cfg.Announce),
- })
- }
- for _, peer := range c.cfg.Peers {
- bootCfg.Servers = append(bootCfg.Servers, raft.Server{
- ID: raft.ServerID(peer),
- Address: raft.ServerAddress(peer),
- })
- }
-
- bootFut := rft.BootstrapCluster(bootCfg)
- if err := bootFut.Error(); err != nil {
- return nil, fmt.Errorf("bootstrap cluster: %w", err)
- }
-
- log.Infof("bootstrap new cluster")
- } else {
- log.Infof("start existing cluster")
- }
-
- ch.Send(&BootstrapEvent{})
- c.raft = rft
-
- eventCh := make(chan raft.Observation, 1)
-
- obs := raft.NewObserver(eventCh, true, nil)
- rft.RegisterObserver(obs)
-
- go func() {
- loop:
- for {
- select {
- case <-c.doneCh:
- break loop
-
- case e := <-eventCh:
- state, ok := e.Data.(raft.RaftState)
- if !ok {
- continue
- }
-
- switch state {
- case raft.Leader:
- log.Info("become leader")
-
- ch.Send(&LeaderEvent{
- CurrentIsMaster: true,
- Address: c.cfg.Announce,
- })
-
- case raft.Follower:
- addr, id := rft.LeaderWithID()
-
- log.Infof("become follower, master is: %v, %v", id, addr)
-
- ch.Send(&LeaderEvent{
- CurrentIsMaster: false,
- Address: string(addr),
- })
-
- case raft.Candidate:
- log.Info("become candidate")
- }
-
- }
- }
-
- c.raft.DeregisterObserver(obs)
- }()
-
- return ch, nil
- }
-
- func (c *Cluster) Stop() {
- c.raft.Shutdown().Error()
-
- select {
- case c.doneCh <- nil:
- default:
- }
- }
-
- func (c *Cluster) ID() string {
- return c.cfg.Announce
- }
-
- func (c *Cluster) IsMaster() bool {
- addr, _ := c.raft.LeaderWithID()
- return string(addr) == c.cfg.Announce
- }
-
- func (c *Cluster) Enabled() bool {
- return c.cfg.Enabled
- }
-
- func (c *Cluster) Name() string {
- return c.cfg.NodeName
- }
-
- // 由于主节点可能会变化,因此不要缓存MasterClient,每次都重新获取
- func (c *Cluster) MasterClient() *MasterClient {
- addr, _ := c.raft.LeaderWithID()
-
- c.masterCli.lock.Lock()
- defer c.masterCli.lock.Unlock()
-
- addr2 := string(addr)
- if addr2 == "" {
- if c.masterCli.con != nil {
- c.masterCli.con.Close()
- }
-
- c.masterCli.Client = clirpc.NewFusedClient(rpc.Failed(ecode.ClusterNoMaster, "no master"))
- return &c.masterCli
- }
-
- if c.masterCli.addr != addr2 {
- if c.masterCli.con != nil {
- c.masterCli.con.Close()
- }
-
- gcon, err := grpc.NewClient(addr2, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{
- RootCAs: c.poolCfg.Conn.RootCA,
- Certificates: []tls.Certificate{*c.poolCfg.Conn.ClientCert},
- ServerName: rpc.InternalAPISNIV1,
- NextProtos: []string{"h2"},
- })))
- if err != nil {
- c.masterCli.Client = clirpc.NewFusedClient(rpc.Failed(ecode.OperationFailed, "%v", err))
- c.masterCli.addr = ""
- } else {
- c.masterCli.con = gcon
- c.masterCli.Client = clirpc.NewClient(gcon)
- c.masterCli.addr = addr2
- }
- }
-
- return &c.masterCli
- }
-
- func (c *Cluster) RaftTransport() *Transport {
- return c.transport
- }
-
- // 只有Leader才能调用
- func (c *Cluster) Apply(fsmID string, data []byte, timeout time.Duration) ([]byte, error) {
- fsmIDBytes := []byte(fsmID)
-
- logBytes := make([]byte, 4+len(fsmIDBytes)+len(data))
-
- // 前4个字节表示ID的长度,后面跟着ID和数据
- binary.LittleEndian.PutUint32(logBytes[:4], uint32(len(fsmIDBytes)))
- copy(logBytes[4:], fsmIDBytes)
- copy(logBytes[4+len(fsmIDBytes):], data)
-
- fut := c.raft.Apply(logBytes, timeout)
- err := fut.Error()
- if err != nil {
- return nil, err
- }
-
- applyRet := fut.Response().(applyResult)
- return applyRet.Result, applyRet.Error
- }
-
- type ClusterEvent interface {
- IsClusterEvent() bool
- }
-
- type ClusterEventChan = async.UnboundChannel[ClusterEvent]
-
- type ExitEvent struct {
- Err error
- }
-
- func (e *ExitEvent) IsClusterEvent() bool {
- return true
- }
-
- type BootstrapEvent struct{}
-
- func (e *BootstrapEvent) IsClusterEvent() bool {
- return true
- }
-
- type LeaderEvent struct {
- CurrentIsMaster bool
- Address string
- }
-
- func (e *LeaderEvent) IsClusterEvent() bool {
- return true
- }
-
- type MasterClient struct {
- *clirpc.Client
- con *grpc.ClientConn
- addr string
- lock sync.Mutex
- }
-
- func (c *MasterClient) Release() {
-
- }
|