Article directory
Preface
There are some cases on the Internet about setting offset manually in kafka, but most of them use 0.8 version of kafka, which is written by scala. The version of kafka-0.10 is rarely mentioned or incomplete. Version 0.10 is compatible with previous versions, but the new version of the api really needs to be more concise and easy to use. I can't find anything that can be used directly, so I take time to look at the official website api and write a more complete JAVA test case. I hope it will be helpful to you all.
pom Dependent Version
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.2.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>2.2.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-10_2.11</artifactId> <version>2.2.0</version> </dependency>
Dome
Say little about the code. There are fewer comments, but code naming is a good comment.
public class WordCount { private static String groupId = "group1"; private static Map<TopicPartition,Long> offsets = new HashMap<>(); // Consumer Group Root Path private static Collection<String> topics = Arrays.asList( "mytest-topic","dc01"); private static String zkServerUrls = "dc-sit-225:2181"; private static ZkClient zkClient = new ZkClient(zkServerUrls); // Consumer Group Root Path private static String rootPath = "/consumers/offsets/" + groupId; public static void main(String[] args) throws InterruptedException { SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("KafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5)); initOffset(zkClient); JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream( jssc, LocationStrategies.PreferConsistent(), // Configuration theme, kafka parameters, offsets ConsumerStrategies.<String, String>Subscribe(topics, initKafkaParams(),offsets) ); JavaDStream<String> words = stream.flatMap(x -> updateOffset(x)); // Count each word in each batch JavaPairDStream<String, Integer> pairs = words.mapToPair(s -> new Tuple2<>(s, 1)); JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey((i1, i2) -> i1 + i2); wordCounts.print(); jssc.start(); // Start the computation jssc.awaitTermination(); // Wait for the computation to terminate } public static void initOffset(ZkClient zkClient){ if (!zkClient.exists(rootPath)){ for (String topic : topics){ String path = rootPath + "/" + topic ; zkClient.createPersistent(path,true); } }else { List<String> topicSet = zkClient.getChildren(rootPath); for (String topic : topicSet){ String topicPath = rootPath + "/" + topic; List<String> partitionSet = zkClient.getChildren(topicPath); for (String partition : partitionSet){ Long offset = zkClient.readData(topicPath + "/" + partition); TopicPartition topicPartition = new TopicPartition(topic, Integer.valueOf(partition)); offsets.put(topicPartition,offset); } } } } public static Map initKafkaParams(){ Map<String, Object> kafkaParams = new HashMap<>(); kafkaParams.put("bootstrap.servers", "dc-sit-225:9092"); kafkaParams.put("key.deserializer", StringDeserializer.class); kafkaParams.put("value.deserializer", StringDeserializer.class); kafkaParams.put("group.id", groupId); kafkaParams.put("auto.offset.reset", "earliest"); kafkaParams.put("enable.auto.commit", false); return kafkaParams; } public static Iterator<String> updateOffset(ConsumerRecord<String,String> consumerRecord){ TopicPartition topicPartition = new TopicPartition(consumerRecord.topic(), Integer.valueOf(consumerRecord.partition())); offsets.put(topicPartition,consumerRecord.offset()); String path = rootPath + "/" + consumerRecord.topic() + "/" + consumerRecord.partition(); if (!zkClient.exists(path)){ zkClient.createPersistent(path,true); } else{ zkClient.writeData(path, consumerRecord.offset()); } return Arrays.asList(consumerRecord.value().split(" ")).iterator(); } }