background
Connect with above https://my.oschina.net/tuxpy/blog/1631953
Previously, a two-way flow was implemented through grpc. All stream s exist in a sync.Map. If the pressure of chat room is high at this time, how to expand? To put it simply, how to do load balancing. Prior to https://segmentfault.com/a/1190000008672912 Having seen an article about how to realize load balancing by etcd, I realized it.
problem
- How does grpc's service discovery work?
- How to make the chat content sent by a client receive all the clients distributed on different nodes
- If all rpc methods are request <-> response mode, there's no problem. It's possible to lose a request that is currently being processed. But for applications like chat rooms, two-way flow mode is used. Once a node down s, stream s on subsequent servers will occur. Get codes.Unavailable error
Solve
- Reference resources https://segmentfault.com/a/1190000008672912 . Server registers key, keepalive regularly, client watch
- Using put + watch of etcd to implement a remote channel, all service nodes listen to the PUT behavior of a key, and call corresponding methods according to value content (such as broadcasting messages to all clients connected to themselves). It was also thought that each service is also a grpc client, which remotely calls the broadcasting operations of other nodes whenever it has its own broadcasting behavior. God, keep a list of all the nodes and kill me.
- Separate the authentication operation from the chat operation, first authenticate, take out token, and then bring it with the rpc call of the follow-up chat. token authentication uses the session implemented by etcd to reconnect each time the line is disconnected, which only affects the stream of the chat and does not repeat login.
Realization
service
/* * * Author : tuxpy * Email : q8886888@qq.com.com * Create time : 3/7/18 9:18 AM * Filename : service.go * Description : * * */ package main import ( "bytes" "context" "crypto/rand" "encoding/gob" "encoding/hex" "encoding/json" "flag" "fmt" grpclb "grpclb/etcdv3" pb "grpclb/helloword" "io" "log" "net" "os" "os/signal" "strings" "sync" "syscall" "time" "utils" "github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/mvcc/mvccpb" "github.com/golang/protobuf/ptypes/timestamp" "github.com/pkg/errors" "google.golang.org/grpc" "google.golang.org/grpc/metadata" "google.golang.org/grpc/peer" ) type Service struct{} type ConnectPool struct { sync.Map } type RemoteCommand struct { Command string Args map[string]string } type RemoteChannel struct { In chan RemoteCommand Out chan RemoteCommand cli *clientv3.Client } type SessionManager struct { cli *clientv3.Client } type Session struct { Name string Token string } var connect_pool *ConnectPool var remote_channel *RemoteChannel var session_manger *SessionManager // Broadcasting messages to other service s. Because of load balancing, a client stream may fall on different nodes, requiring broadcasting behavior to all nodes. func ReadyBroadCast(from, message string) { remote_channel.Out <- RemoteCommand{ Command: "broadcast", Args: map[string]string{ "from": from, "message": message, }, } } func (sm *SessionManager) Get(token string) (*Session, error) { key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token) resp, err := sm.cli.Get(context.Background(), key) if err != nil { return nil, err } kv := resp.Kvs[0] session := &Session{} err = json.Unmarshal(kv.Value, session) if err != nil { return nil, errors.Wrap(err, "failed to unmarshal session data") } _, err = sm.cli.KeepAliveOnce(context.Background(), clientv3.LeaseID(kv.Lease)) utils.CheckErrorPanic(err) return session, nil } func (sm *SessionManager) GetFromContext(ctx context.Context) (*Session, error) { md, _ := metadata.FromIncomingContext(ctx) tokens := md["token"] if len(tokens) == 0 { return nil, errors.New("Miss token") } return sm.Get(tokens[0]) } func (sm *SessionManager) New(name string) (*Session, error) { buf := make([]byte, 16) io.ReadFull(rand.Reader, buf) token := hex.EncodeToString(buf) key := fmt.Sprintf("%s/%s/session/%s", grpclb.Prefix, *srv, token) grant, err := sm.cli.Grant(context.Background(), 60*5) // token is valid for 5 minutes if err != nil { return nil, errors.Wrap(err, "grant etcd lease ") } session := &Session{ Name: name, Token: token, } buf, err = json.Marshal(session) _, err = sm.cli.Put(context.Background(), key, string(buf), clientv3.WithLease(grant.ID)) if err != nil { return nil, errors.Wrap(err, "etcd3 put") } return session, nil } func (p *ConnectPool) Get(name string) pb.Greeter_SayHelloServer { if stream, ok := p.Load(name); ok { return stream.(pb.Greeter_SayHelloServer) } else { return nil } } func (p *ConnectPool) Add(name string, stream pb.Greeter_SayHelloServer) { p.Store(name, stream) } func (p *ConnectPool) Del(name string) { p.Delete(name) } func (p *ConnectPool) BroadCast(from, message string) { log.Printf("BroadCast from: %s, message: %s\n", from, message) p.Range(func(username_i, stream_i interface{}) bool { username := username_i.(string) stream := stream_i.(pb.Greeter_SayHelloServer) if username == from { return true } else { log.Printf("From %s to %s\n", from, username) utils.CheckErrorPanic(stream.Send(&pb.HelloReply{ Message: message, MessageType: pb.HelloReply_NORMAL_MESSAGE, TS: ×tamp.Timestamp{Seconds: time.Now().Unix()}, })) } return true }) } func (s *Service) Login(ctx context.Context, in *pb.LoginRequest) (*pb.LoginReply, error) { if connect_pool.Get(in.GetUsername()) != nil { return nil, errors.Errorf("username %s already exists", in.GetUsername()) } session, err := session_manger.New(in.GetUsername()) if err != nil { return nil, err } ReadyBroadCast(in.GetUsername(), fmt.Sprintf("Welcome %s!", in.GetUsername())) return &pb.LoginReply{ Token: session.Token, Success: true, Message: "success", }, nil } func (s *Service) SayHello(stream pb.Greeter_SayHelloServer) error { var ( session *Session err error ) peer, _ := peer.FromContext(stream.Context()) log.Printf("Received new connection. %s", peer.Addr.String()) session, err = session_manger.GetFromContext(stream.Context()) // With token in context, take out session if err != nil { stream.Send(&pb.HelloReply{ Message: err.Error(), MessageType: pb.HelloReply_CONNECT_FAILED, }) return nil } username := session.Name connect_pool.Add(username, stream) stream.Send(&pb.HelloReply{ Message: fmt.Sprintf("Connect success!"), MessageType: pb.HelloReply_CONNECT_SUCCESS, }) // Send a reminder of successful connection go func() { <-stream.Context().Done() connect_pool.Del(username) // When the user leaves the chat room, delete it from the connection pool ReadyBroadCast(username, fmt.Sprintf("%s leval room", username)) }() for { req, err := stream.Recv() if err != nil { return err } ReadyBroadCast(username, fmt.Sprintf("%s: %s", username, req.Message)) } return nil } var ( srv = flag.String("service", "chat_service", "service name") port = flag.Int("port", 8880, "listening port") reg = flag.String("reg", "http://127.0.0.1:2479", "register etcd address") ) func GetListen() string { return fmt.Sprintf("0.0.0.0:%d", *port) } func NewSessionManager(cli *clientv3.Client) *SessionManager { return &SessionManager{ cli: cli, } } // Accept operation behavior from other nodes by using watch of etcd3 func NewRemoteChannel(cli *clientv3.Client) *RemoteChannel { qc := &RemoteChannel{ cli: cli, In: make(chan RemoteCommand, 1), Out: make(chan RemoteCommand, 1), } go func() { var command RemoteCommand var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv) var buf bytes.Buffer var err error var dec *gob.Decoder rch := qc.cli.Watch(context.Background(), channel) for wresp := range rch { for _, ev := range wresp.Events { buf.Reset() dec = gob.NewDecoder(&buf) switch ev.Type { case mvccpb.PUT: buf.Write(ev.Kv.Value) err = dec.Decode(&command) if err != nil { log.Printf("recv an error message. %s\n", err.Error()) } else { qc.In <- command } } } } }() go func() { var command RemoteCommand var channel string = fmt.Sprintf("%s/%s/channel", grpclb.Prefix, *srv) var buf bytes.Buffer var enc *gob.Encoder for { buf.Reset() enc = gob.NewEncoder(&buf) command = <-qc.Out utils.CheckErrorPanic(enc.Encode(command)) qc.cli.Put(context.Background(), channel, buf.String()) } }() return qc } func NewEtcd3Client() (*clientv3.Client, error) { cli, err := clientv3.New(clientv3.Config{ Endpoints: strings.Split(*reg, ","), }) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("Create etcd3 client failed: %s", err.Error())) } return cli, nil } func main() { var err error flag.Parse() connect_pool = &ConnectPool{} etcd_cli, err := NewEtcd3Client() utils.CheckErrorPanic(err) remote_channel = NewRemoteChannel(etcd_cli) session_manger = NewSessionManager(etcd_cli) go func() { var command RemoteCommand for command = range remote_channel.In { switch command.Command { case "broadcast": connect_pool.BroadCast(command.Args["from"], command.Args["message"]) } } }() lis, err := net.Listen("tcp", GetListen()) utils.CheckErrorPanic(err) fmt.Println("Listen on", GetListen()) err = grpclb.Register(*srv, "127.0.0.1", *port, *reg, time.Second*3, 15) // Register the current node to etcd utils.CheckErrorPanic(err) ch := make(chan os.Signal) signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP, syscall.SIGQUIT) go func() { s := <-ch log.Printf("receive signal '%v'\n", s) grpclb.UnRegister() // After the program has been withdrawn, take the initiative to unregister signal.Stop(ch) switch s := s.(type) { case syscall.Signal: syscall.Kill(os.Getpid(), s) default: os.Exit(1) } }() s := grpc.NewServer(grpc.RPCCompressor(grpc.NewGZIPCompressor()), grpc.RPCDecompressor(grpc.NewGZIPDecompressor())) pb.RegisterGreeterServer(s, &Service{}) utils.CheckErrorPanic(s.Serve(lis)) }
client
package main import ( "bufio" "context" "flag" "fmt" grpclb "grpclb/etcdv3" "io" "log" "os" "sync" "time" "utils" "github.com/pkg/errors" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" pb "grpclb/helloword" ) var name *string = flag.String("name", "guess", "what's your name?") var reg *string = flag.String("reg", "http://127.0.0.1:2479", "register etcd address") var serv *string = flag.String("service", "chat_service", "service name") var mutex sync.Mutex func ConsoleLog(message string) { mutex.Lock() defer mutex.Unlock() fmt.Printf("\n------ %s -----\n%s\n> ", time.Now(), message) } func Input(prompt string) string { fmt.Print(prompt) reader := bufio.NewReader(os.Stdin) line, _, err := reader.ReadLine() if err != nil { if err == io.EOF { return "" } else { panic(errors.Wrap(err, "Input")) } } return string(line) } type Robot struct { sync.Mutex conn *grpc.ClientConn client pb.GreeterClient chat_stream pb.Greeter_SayHelloClient ctx context.Context cancel context.CancelFunc token string } func (robot *Robot) Cancel() { robot.cancel() } func (robot *Robot) Done() <-chan struct{} { return robot.ctx.Done() } func (robot *Robot) Connect() error { robot.Lock() defer robot.Unlock() if robot.conn != nil { robot.conn.Close() } r := grpclb.NewResolver(*serv) lb := grpc.RoundRobin(r) ctx, cancel := context.WithCancel(context.Background()) robot.ctx = ctx robot.cancel = cancel conn, err := grpc.DialContext(ctx, *reg, grpc.WithInsecure(), grpc.WithDecompressor(grpc.NewGZIPDecompressor()), grpc.WithCompressor(grpc.NewGZIPCompressor()), grpc.WithBalancer(lb), grpc.WithBlock()) if err != nil { return errors.Wrap(err, "Client Connect") } client := pb.NewGreeterClient(conn) robot.conn = conn robot.client = client robot.chat_stream = nil return nil } func (robot *Robot) GetChatStream() pb.Greeter_SayHelloClient { robot.Lock() defer robot.Unlock() if robot.chat_stream != nil { return robot.chat_stream } ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("token", robot.token)) for { stream, err := robot.client.SayHello(ctx) if err != nil { fmt.Printf("get chat stream failed. %s", err.Error()) time.Sleep(1 * time.Second) } else { robot.chat_stream = stream return robot.chat_stream } } return nil } func (robot *Robot) Login(username string) error { robot.Lock() defer robot.Unlock() reply, err := robot.client.Login(context.Background(), &pb.LoginRequest{ Username: username, }) if err != nil { return errors.Wrap(err, "Login") } robot.token = reply.GetToken() return nil } func NewRobot() *Robot { robot := &Robot{} utils.CheckErrorPanic(robot.Connect()) return robot } func main() { flag.Parse() robot := NewRobot() utils.CheckErrorPanic(robot.Login(*name)) ConsoleLog("Login successfully") // Monitor Server Notification go func() { var ( reply *pb.HelloReply err error ) for { reply, err = robot.GetChatStream().Recv() if err != nil && grpc.Code(err) == codes.Unavailable { ConsoleLog("Disconnected from the server, Retry") robot.Connect() ConsoleLog("Reconnect successfully") time.Sleep(time.Second) continue } utils.CheckErrorPanic(err) ConsoleLog(reply.Message) if reply.MessageType == pb.HelloReply_CONNECT_FAILED { log.Println("Connect failed.") robot.Cancel() break } } }() // Accept chat information and send chat content go func() { var ( line string err error ) for { line = Input("") if line == "exit" { robot.Cancel() break } err = robot.GetChatStream().Send(&pb.HelloRequest{ Message: line, }) fmt.Print("> ") if err != nil { ConsoleLog(fmt.Sprintf("there was error sending data. %s", err.Error())) continue } } }() <-robot.Done() fmt.Println("Bye") }
grpclb/etcdv3
See the concrete realization Reference link I made only minor changes.