###Observer of coprocessor of hbase coprocessor

Keywords: Big Data HBase Apache Hadoop Java

  • Customize JAVA class and inherit BaseRegionObserver

package com.charley.example.hbase2es;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;

/**
 * author: Charley
 * 
 */
public class ESObserver extends BaseRegionObserver {

    private TransportClient client = null;

    private String clusterName = null;
    private String nodeHost = null;
    private int nodePort = 0;
    private String indexName = null;
    private String typeName = null;

    /**
     * Only once in the life cycle
     * Suitable for some initialization operations
     */
    @Override
    public void start(CoprocessorEnvironment e) throws IOException {
        super.start(e);
        init(e);
        Settings settings = Settings.builder().put("cluster.name", clusterName).put("client.transport.sniff", true).build();
        try {
            client = new PreBuiltTransportClient(settings).addTransportAddress(new TransportAddress(InetAddress.getByName(nodeHost), nodePort));
        } catch (UnknownHostException ex) {
            ex.printStackTrace();
        }
    }

    /**
     * Initialization parameters are passed when you add Observer to hbase table
     */
    private void init(CoprocessorEnvironment e) throws IOException {
        clusterName = e.getConfiguration().get("cluster");
        nodeHost = e.getConfiguration().get("host");
        nodePort = e.getConfiguration().getInt("port", 9300);
        indexName = e.getConfiguration().get("index");
        typeName = e.getConfiguration().get("type");
    }

    /**
     * It is executed many times in the life cycle. Each time a put operation is performed on the table corresponding to hbase, it will be executed once
     * This is the core method of synchronizing data to ES
     */
    @Override
    public void postPut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException {
        super.postPut(e, put, edit, durability);
        String indexId = new String(put.getRow());//Get rowkey
        NavigableMap<byte[], List<Cell>> familyMap = put.getFamilyCellMap();
        Map<String, Object> json = new HashMap<>();
        for (Map.Entry<byte[], List<Cell>> entry : familyMap.entrySet()) {
            for (Cell cell : entry.getValue()) {
                String key = Bytes.toString(CellUtil.cloneQualifier(cell));
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                json.put(key, value);
            }
        }
        //Write data to ES
        client.prepareIndex(indexName, typeName, indexId).setSource(json, XContentType.JSON).get();
        //Data can also be written to other places, such as mysql, custom log, etc., which can be used freely
    }

    /**
     * Only once in the life cycle
     * Operations suitable for recycling
     */
    @Override
    public void stop(CoprocessorEnvironment e) throws IOException {
        super.stop(e);
    }

}
  • Print the Java code into a jar package and copy it to hdfs. Jar ﹐ hdfs ﹐ path = hdfs: / / / jar / hbase2es.jar

  • Add coprocessor to hbase table

    • Add coprocessor
      • Replace hbase? Table? Name with the table in hbase where you want to add coprocessor
      • Method = >
      • 'coprocessor'=>'jar_file_path|observer_class_path|priority|params'
        • There are four parameters in total, which are divided by | and the last params can be omitted
        • Jar file path, the absolute path to store the jar package made of Java code, preferably the HDFS path
        • observer class path, the package name of the observer class plus the class name
        • priority, just use the fixed 1001
        • params, the parameter information passed to the observer, equivalent to map, for example: id=123,name = 'haha', age=18
    create 'hbase_table_name','ow'
    disable 'hbase_table_name'
    alter 'hbase_table_name' , METHOD =>'table_att','coprocessor'=>'hdfs:///jar/hbase2es.jar|com.charley.example.hbase2es.ESObserver|1001|cluster=my_cluster,host=192.168.100.100,port=9300,index=index_test,type=type_test'
    enable 'hbase_table_name'
    
    • Cancel coprocessor
    disable 'hbase_table_name'
    alter 'hbase_table_name' , METHOD =>'table_att_unset',NAME=>'coprocessor$1'
    enable 'hbase_table_name'
    
  • At this time, coprocessor has been set up. Directly add a piece of data to hbase table 'hbase table' name ', and it will be found that corresponding data has also been generated in ES. Then hbase synchronizes data to es to realize hbase secondary index

Note: if there is a problem with the observer added for the first time, you must rename the jar package when packaging and uploading for the second time, otherwise it may not take effect.

Posted by kurtsu on Tue, 10 Dec 2019 04:29:07 -0800