flink dynamic broadcast status notes
1. outline
Often we need dynamic matching rules when doing the flink task calculation, but we can't redeploy the service every time we modify it, so we need to use the dynamic broadcast state mechanism of flink to deal with our rule changes in real time.
2. example
In this example, we add dynamic rule judgment to determine whether external storage verification is needed according to real-time rule changes.
Code:
package com.kn.broadcast import org.apache.flink.api.common.state.MapStateDescriptor import org.apache.flink.api.common.typeinfo.BasicTypeInfo import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector object Broadcast01 { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment //Set custom broadcast flow: define broadcast rule job1: storageflag: Yes (jobname: storage flag: switch value) val broadcastConfig = env.socketTextStream("localhost", 9001) .filter(_.contains("job1")) .map(_.split(":")(2)) //Get the flag value .broadcast(new MapStateDescriptor("configFilter",BasicTypeInfo.STRING_TYPE_INFO,BasicTypeInfo.STRING_TYPE_INFO)) //Define datasets env.socketTextStream("localhost",9000) .connect(broadcastConfig) // And associated with broadcast status data .process(new BroadcastProcessFunction[String,String,String] { //Define interception rules var flag:String = _ override def open(parameters: Configuration): Unit = { println("open function") flag="no" super.open(parameters) // getRuntimeContext.getBroadcastVariable("configFilter").toArray.foreach(println(_)) } override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = { println("processElement function: "+value) if("yes".equals(flag)){ out.collect("The storage status needs to be checked. The received messages are:"+value+" flag:"+flag) }else{ out.collect("There is no need to check the storage status. The received message is:"+value+" flag:"+flag) } } //Get the broadcast update variable. Only when there is new stream data in the broadcast, it will be executed according to the number of parallelism. override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = { flag = value println("processBroadcastElement function") } }).print() env.execute("flink broadcast job") } }