flink dynamic broadcast status

Keywords: Apache Scala

flink dynamic broadcast status notes

1. outline

Often we need dynamic matching rules when doing the flink task calculation, but we can't redeploy the service every time we modify it, so we need to use the dynamic broadcast state mechanism of flink to deal with our rule changes in real time.

2. example

In this example, we add dynamic rule judgment to determine whether external storage verification is needed according to real-time rule changes.

Code:

package com.kn.broadcast

import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object Broadcast01 {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //Set custom broadcast flow: define broadcast rule job1: storageflag: Yes (jobname: storage flag: switch value)
   val broadcastConfig = env.socketTextStream("localhost", 9001)
     .filter(_.contains("job1"))
     .map(_.split(":")(2))  //Get the flag value
      .broadcast(new MapStateDescriptor("configFilter",BasicTypeInfo.STRING_TYPE_INFO,BasicTypeInfo.STRING_TYPE_INFO))

    //Define datasets
    env.socketTextStream("localhost",9000)
      .connect(broadcastConfig) // And associated with broadcast status data
      .process(new BroadcastProcessFunction[String,String,String] {
        //Define interception rules
        var flag:String = _

        override def open(parameters: Configuration): Unit = {
          println("open function")
          flag="no"
          super.open(parameters)
//          getRuntimeContext.getBroadcastVariable("configFilter").toArray.foreach(println(_))
        }

        override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
          println("processElement function: "+value)
          if("yes".equals(flag)){
            out.collect("The storage status needs to be checked. The received messages are:"+value+" flag:"+flag)
          }else{
            out.collect("There is no need to check the storage status. The received message is:"+value+" flag:"+flag)
          }
        }

        //Get the broadcast update variable. Only when there is new stream data in the broadcast, it will be executed according to the number of parallelism.
        override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {
          flag = value
          println("processBroadcastElement function")
        }
      }).print()

    env.execute("flink broadcast job")
  }

}

Posted by spectsteve7 on Sat, 02 Nov 2019 17:44:14 -0700