Hbase client programming (Eclipse)

Keywords: Java HBase Hadoop Apache Zookeeper

Hadoop stepping on the pit (4)

Hbase client programming (Eclipse)

Environmental Science

For the installation and configuration of Hbase and the configuration of Eclipse, please refer to the previous two articles

The version of hbase used in this series is 1.4.13

The selected hadoop version of this series is 2.8.5

Please pay attention to the personalized configuration of package name, server, etc

Introducing jar package

The client related jar package in Hbase needs to be introduced into Build Path

In theory, we only need to org.apache.hadoop.hbase. * related packages can be imported, but in practice, there is a lack of them. Therefore, all jar packages under the lib directory of hbase are imported

This crude method is due to my lack of knowledge of java development. Experienced friends should be able to introduce it on demand.

Table creation and deletion

The Student table structure of an example is shown in the following figure

The code is as follows

package wit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
public class HBaseTest {
//Declare static configuration HBaseConfiguration
   static Configuration cfg=HBaseConfiguration.create();
    //Create student table
   public static void createStdTable() throws Exception {
   cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw");
      //Data table name
    String tablename="Student";
    //List of column cluster names
    String[] columnFamilys= new String[] {"Std","Course"};
    //Establishing a connection
    Connection con = ConnectionFactory.createConnection(cfg);
    //Get Admin object
    Admin admin = con.getAdmin();
    //Get table object
    TableName tName  = TableName.valueOf(tablename);
    //Determine whether the table exists
        if (admin.tableExists(tName)) {
            System.out.println("table Exists!");
            System.exit(0);
        }
        else{
        HTableDescriptor tableDesc = new HTableDescriptor(tName);
        //Add column cluster
        for(String cf:columnFamilys)
        {
          HColumnDescriptor cfDesc = new HColumnDescriptor(cf);
          if(cf.equals("Course"))//Set the maximum history version of the course
            cfDesc.setMaxVersions(3);
          tableDesc.addFamily(cfDesc);
        }
        //Create table
            admin.createTable(tableDesc);
            System.out.println("create table success!");
        }
        admin.close();
        con.close();
    }
   public static void  main (String [] agrs) throws Throwable {
      try {
        createStdTable();
      }
      catch (Exception e) {
        e.printStackTrace();
      }
  }

}

among

cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw");

HBase imported- site.xml In theory, the server configuration in the file should be referred to as project configuration, but it is not achieved due to my limited level. Therefore, the method of manual configuration in the code is used, which is also used in the following code.

Run the above code and get the prompt of create table success! Which means the creation is successful

The code to delete the table is as follows

package wit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

public class Delete {
//Declare the static configuration HBaseConfiguration
   static Configuration cfg=HBaseConfiguration.create();
   public static void DeleteTable() throws Exception{
   cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw");
   Connection con = ConnectionFactory.createConnection(cfg);
   //Get table object
   TableName tablename = TableName.valueOf("Student");
    //Get Admin object
   Admin admin = con.getAdmin();
   if(admin.tableExists(tablename)){
     try
       {
        admin.disableTable(tablename);
        admin.deleteTable(tablename);
       }catch(Exception ex){
         ex.printStackTrace();
       }
    }
 }
   public static void  main (String [] agrs) throws Throwable {
    try {
      DeleteTable();
    }
    catch (Exception e) {
      e.printStackTrace();
    }
  }
}

Table schema modification

Add new column

The following code will add a new column Test in the table

public static void  AddStdColFamily () throws Throwable {
    Connection con = ConnectionFactory.createConnection(cfg);
    //Get table object
    TableName tablename = TableName.valueOf("Student");
    //Get Admin object
    Admin admin = con.getAdmin();
    HColumnDescriptor newCol = new HColumnDescriptor("Test");
    newCol.setMaxVersions(3);
    if(admin.tableExists(tablename)){
      try
        {
         admin.disableTable(tablename);
         admin.addColumn(tablename, newCol);
        }catch(Exception ex){
          ex.printStackTrace();
       }
    }
    admin.enableTable(tablename);
    admin.close();
    con.close();
}

Modify column cluster properties

The maximum number of historical versions to modify the Test column cluster is 5

public static void  ModifyStdColFamily () throws Throwable {
  Connection con = ConnectionFactory.createConnection(cfg);
  //Get table object
  TableName tablename = TableName.valueOf("Student");
  //Get Admin object
   Admin admin = con.getAdmin();
   HColumnDescriptor modCol = new HColumnDescriptor("Test");
   modCol.setMaxVersions(5);
   if(admin.tableExists(tablename)){
     try
     {
       admin.disableTable(tablename);
       admin.modifyColumn(tablename, modCol);
      }catch(Exception ex){
          ex.printStackTrace();
      }
     }
      admin.enableTable(tablename);
      admin.close();
      con.close();
}  

Delete column

Delete Test column

public static void  DeleteStdColFamily() throws Throwable {
   Connection con = ConnectionFactory.createConnection(cfg);
   //Get table object
   TableName tablename = TableName.valueOf("Student");
   //Get Admin object
   Admin admin = con.getAdmin();
 if(admin.tableExists(tablename)){
     try
     {
       admin.disableTable(tablename);
       admin.deleteColumn(tablename, Bytes.toBytes("Test"));
      }catch(Exception ex){
         ex.printStackTrace();
       }
    }
    admin.enableTable(tablename);
    admin.close();
    con.close();
}

Insert and modify data in the table (omitted)

Integration with MapReduce

Write data table after reading hdfs file

hdfs file Std.txt The content is

200215125, Jim, Male, 2008-12-09, CS, 89, 78, 56
200215126, Marry, Female, 2001-2-09, AI , 79, 72, 66
200215127, Marker, Male, 2003-12-19, CE, 78, 48, 36

It should be noted that this file cannot have blank lines, otherwise an error will be reported when reading the data

The code is as follows. The Map process reads Std.txt For each line in the file, set the Student number as key and other information of the Student as value, and then write out the intermediate result. The Reduce process is responsible for writing the intermediate results formed by the Map to the Student table of HBase, so Reduce inherits to the TableReducer and is used in the main function

package wit;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class StdHdfsToHBase {
  public static class HDFSMap extends Mapper<Object, Text, Text, Text> {
      //The map function is implemented to read the std.txt file
    public void map(Object key, Text value, Context context)
    throws IOException, InterruptedException
{
    //Take the student's student number as rowKey
    String stdRowKey = value.toString().split(",")[0];
     System.out.println(stdRowKey);
    //The student information after the student number is value
    String stdInfo =
value.toString().substring(stdRowKey.length()+1);
     System.out.println(stdInfo);
    context.write(new Text(stdRowKey), new Text(stdInfo));
}
   }
  public static class HDFSReducer extends TableReducer<Text, Text,
ImmutableBytesWritable>{
     @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
    throws IOException, InterruptedException {
        Put put = new Put(key.getBytes());
        for (Text val : values) {
         String[] stdInfo = val.toString().split(",");
             put.addColumn("Std".getBytes(), "Name".getBytes(), 
stdInfo[0].getBytes());
             put.addColumn("Std".getBytes(), "gender".getBytes(), 
stdInfo[1].getBytes());
             put.addColumn("Std".getBytes(), "birth".getBytes(), 
stdInfo[2].getBytes());
             put.addColumn("Std".getBytes(), "dept".getBytes(), 
stdInfo[3].getBytes());
             put.addColumn("Course".getBytes(), "math".getBytes(), 
Bytes.toBytes(Long.parseLong(stdInfo[4])));
             put.addColumn("Course".getBytes(), "arts".getBytes(), 
Bytes.toBytes(Long.parseLong(stdInfo[5])));
             put.addColumn("Course".getBytes(), "phy".getBytes(), 
Bytes.toBytes(Long.parseLong(stdInfo[6])));
             //Write student information to HBase table
           context.write(new ImmutableBytesWritable(key.getBytes()), put); 
          }
    }
}
  public static void main(String[] args) throws
    IOException, ClassNotFoundException, InterruptedException {
      Configuration conf = HBaseConfiguration.create();
      conf.set("hbase.zookeeper.quorum", "hadoop1-ali,hadoop2-hw");
      Job job = Job.getInstance(conf, "StdHdfsToHBase");
      job.setJarByClass(StdHdfsToHBase.class);
      // Set up Map
      job.setMapperClass(HDFSMap.class); 
      job.setMapOutputKeyClass(Text.class);
          job.setMapOutputValueClass(Text.class);
          //Set up Reducer
      TableMapReduceUtil.initTableReducerJob("Student",
      HDFSReducer.class, job);
          job.setOutputKeyClass(ImmutableBytesWritable.class);
          job.setOutputValueClass(Put.class);
      //set up std.txt Input directory for
      FileInputFormat.addInputPath(job, new
      Path("hdfs://hadoop1-ali:9000/input/std"));
      System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

Original from Chen Xi's blog

Posted by Quest on Sun, 28 Jun 2020 21:59:57 -0700