How to use raft algorithm in etcd

Keywords: snapshot github

Raft protocol is a consistency algorithm that solves the problem of data consistency among multiple machines.Raft claims to be concise and can replace the very complex PAXOS algorithm.However, after reviewing raft's paper, you will find that it is cumbersome to fully implement raft even if you claim to be concise and clear.

Etcd is a distributed key-value storage component that guarantees data consistency across multiple machines using raft algorithm.Can raft algorithms in etcd be extracted and used in your own projects?

The answer is yes.etcd not only implements raft, but also decouples raft perfectly and can be used independently.Code base here: https://github.com/etcd-io/etcd/tree/master/raft.

The disadvantage is that the usage documentation for etcd raft is poorly written, and the code listed in the document is missing a lot of key parts, so it can't run away.Write it according to the code in the document, either as an error or as a go panic, or as the machine runs stiff and does not vote.Through the practice of the author, the missing code has been filled, and an example that can run has been completed. See the code at the end of the article.

In practice, use a few points that are not mentioned in the documentation:

  1. The document says n: = raft.StartNode () can start a node, which will actually panic, encapsulate an additional struct of its own, and implement the Process() method (see rNode in raft.go in this article)

  2. Documentation states that the n.Step() method is called when an RPC message is received from the other node in the cluster:

func recvRaftRPC(ctx context.Context, m raftpb.Message) {
    n.Step(ctx, m)
}

But where is this recvRaftRPC() called?Recalling article 1, don't you want to encapsulate a struct yourself? n.Step() should be written in the Process () method of this struct, not in what recvRaftRPC() (see rNode in raft.go in this article).The raft algorithm calls Process() when it receives RPC requests from other nodes.

  1. Or raft.StartNode(), the code for this document:
n := raft.StartNode(c, []raft.Peer{{ID: 0x02}, {ID: 0x03}})

This means a three-node cluster. If the current boot node ID is 0x01, then the peer list only passes 0x02, 0x03 at boot time, but does not pass itself. Actually, starting a cluster in this way will stiff without elections.The correct approach is to pass the node itself into the peer list.

  1. The for-select loop in the document is written in a go collaboration.Otherwise, the cluster will freeze without elections when it starts.

Sample code description

The sample code for this article is a three-node cluster in which raft messages are exchanged through http before the nodes.

After the cluster starts, the 0x01 node requests proposals (that is, business data) every 1 second:

for {
    log.Printf("Propose on node %v\n", *id)
    n.node.Propose(context.TODO(), []byte("hello"))
    time.Sleep(time.Second)
}

Then at this point in the code:

for _, entry := range rd.CommittedEntries {
    switch entry.Type {
    case raftpb.EntryNormal:
       log.Printf("Receive committed data on node %v: %v\n", rn.id, string(entry.Data))
    ....
}

Each node of the cluster receives this proposal, which is then consistent within the cluster and can be safely persisted.

Full code:

main.go

package main

import (
	"context"
	"flag"
	"log"
	"time"
)

func main() {
	id := flag.Uint64("id", 1, "node id")
	flag.Parse()
	log.Printf("I'am node %v\n", *id)

	cluster := map[uint64]string{
		1: "http://127.0.0.1:22210",
		2: "http://127.0.0.1:22220",
		3: "http://127.0.0.1:22230",
	}
	n := newRaftNode(*id, cluster)

	if *id == 1 {
		time.Sleep(5 * time.Second)
		for {
			log.Printf("Propose on node %v\n", *id)
			n.node.Propose(context.TODO(), []byte("hello"))
			time.Sleep(time.Second)
		}

	}

	select {}

}

raft.go

package main

import (
	"context"
	"log"
	"net/http"
	"strconv"
	"strings"
	"time"

	"go.etcd.io/etcd/etcdserver/api/rafthttp"
	stats "go.etcd.io/etcd/etcdserver/api/v2stats"
	"go.etcd.io/etcd/pkg/types"
	"go.etcd.io/etcd/raft"
	"go.etcd.io/etcd/raft/raftpb"
	"go.uber.org/zap"
)

type rNode struct {
	id      uint64
	peerMap map[uint64]string

	node        raft.Node
	raftStorage *raft.MemoryStorage

	transport *rafthttp.Transport
}

func newRaftNode(id uint64, peerMap map[uint64]string) *rNode {
	n := &rNode{
		id:          id,
		peerMap:     peerMap,
		raftStorage: raft.NewMemoryStorage(),
	}
	go n.startRaft()
	return n
}

func (rn *rNode) startRaft() {
	peers := []raft.Peer{}
	for i := range rn.peerMap {
		peers = append(peers, raft.Peer{ID: uint64(i)})
	}
	c := &raft.Config{
		ID:              rn.id,
		ElectionTick:    10,
		HeartbeatTick:   1,
		Storage:         rn.raftStorage,
		MaxSizePerMsg:   4096,
		MaxInflightMsgs: 256,
	}
	rn.node = raft.StartNode(c, peers)
	rn.transport = &rafthttp.Transport{
		Logger:      zap.NewExample(),
		ID:          types.ID(rn.id),
		ClusterID:   0x1000,
		Raft:        rn,
		ServerStats: stats.NewServerStats("", ""),
		LeaderStats: stats.NewLeaderStats(strconv.Itoa(int(rn.id))),
		ErrorC:      make(chan error),
	}
	rn.transport.Start()
	for peer, addr := range rn.peerMap {
		if peer != rn.id {
			rn.transport.AddPeer(types.ID(peer), []string{addr})
		}
	}
	go rn.serveRaft()
	go rn.serveChannels()
}

func (rn *rNode) serveRaft() {
	addr := rn.peerMap[rn.id][strings.LastIndex(rn.peerMap[rn.id], ":"):]
	server := http.Server{
		Addr:    addr,
		Handler: rn.transport.Handler(),
	}
	server.ListenAndServe()
}

func (rn *rNode) serveChannels() {

	ticker := time.NewTicker(100 * time.Millisecond)
	defer ticker.Stop()

	for {
		select {
		case <-ticker.C:
			rn.node.Tick()
		case rd := <-rn.node.Ready():
			rn.raftStorage.Append(rd.Entries)
			rn.transport.Send(rd.Messages)
			if !raft.IsEmptySnap(rd.Snapshot) {
				rn.raftStorage.ApplySnapshot(rd.Snapshot)
			}
			for _, entry := range rd.CommittedEntries {
				switch entry.Type {
				case raftpb.EntryNormal:
					log.Printf("Receive committed data on node %v: %v\n", rn.id, string(entry.Data))
				case raftpb.EntryConfChange:
					var cc raftpb.ConfChange
					cc.Unmarshal(entry.Data)
					rn.node.ApplyConfChange(cc)
				}
			}
			rn.node.Advance()
		case err := <-rn.transport.ErrorC:
			log.Fatal(err)
		}
	}

}

func (rn *rNode) Process(ctx context.Context, m raftpb.Message) error {
	return rn.node.Step(ctx, m)
}
func (rn *rNode) IsIDRemoved(id uint64) bool                           { return false }
func (rn *rNode) ReportUnreachable(id uint64)                          {}
func (rn *rNode) ReportSnapshot(id uint64, status raft.SnapshotStatus) {}

43 original articles published. 50% praised. 680,000 visits+
Private letter follow

Posted by doozerdc on Fri, 17 Jan 2020 18:07:54 -0800