Hadoop stepping on the pit (4)
Hbase client programming (Eclipse)
Environmental Science
For the installation and configuration of Hbase and the configuration of Eclipse, please refer to the previous two articles
The version of hbase used in this series is 1.4.13
The selected hadoop version of this series is 2.8.5
Please pay attention to the personalized configuration of package name, server, etc
Introducing jar package
The client related jar package in Hbase needs to be introduced into Build Path
In theory, we only need to org.apache.hadoop.hbase. * related packages can be imported, but in practice, there is a lack of them. Therefore, all jar packages under the lib directory of hbase are imported
This crude method is due to my lack of knowledge of java development. Experienced friends should be able to introduce it on demand.
Table creation and deletion
The Student table structure of an example is shown in the following figure
The code is as follows
package wit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; public class HBaseTest { //Declare static configuration HBaseConfiguration static Configuration cfg=HBaseConfiguration.create(); //Create student table public static void createStdTable() throws Exception { cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw"); //Data table name String tablename="Student"; //List of column cluster names String[] columnFamilys= new String[] {"Std","Course"}; //Establishing a connection Connection con = ConnectionFactory.createConnection(cfg); //Get Admin object Admin admin = con.getAdmin(); //Get table object TableName tName = TableName.valueOf(tablename); //Determine whether the table exists if (admin.tableExists(tName)) { System.out.println("table Exists!"); System.exit(0); } else{ HTableDescriptor tableDesc = new HTableDescriptor(tName); //Add column cluster for(String cf:columnFamilys) { HColumnDescriptor cfDesc = new HColumnDescriptor(cf); if(cf.equals("Course"))//Set the maximum history version of the course cfDesc.setMaxVersions(3); tableDesc.addFamily(cfDesc); } //Create table admin.createTable(tableDesc); System.out.println("create table success!"); } admin.close(); con.close(); } public static void main (String [] agrs) throws Throwable { try { createStdTable(); } catch (Exception e) { e.printStackTrace(); } } }
among
cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw");
HBase imported- site.xml In theory, the server configuration in the file should be referred to as project configuration, but it is not achieved due to my limited level. Therefore, the method of manual configuration in the code is used, which is also used in the following code.
Run the above code and get the prompt of create table success! Which means the creation is successful
The code to delete the table is as follows
package wit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; public class Delete { //Declare the static configuration HBaseConfiguration static Configuration cfg=HBaseConfiguration.create(); public static void DeleteTable() throws Exception{ cfg.set("hbase.zookeeper.quorum","hadoop1-ali,hadoop2-hw"); Connection con = ConnectionFactory.createConnection(cfg); //Get table object TableName tablename = TableName.valueOf("Student"); //Get Admin object Admin admin = con.getAdmin(); if(admin.tableExists(tablename)){ try { admin.disableTable(tablename); admin.deleteTable(tablename); }catch(Exception ex){ ex.printStackTrace(); } } } public static void main (String [] agrs) throws Throwable { try { DeleteTable(); } catch (Exception e) { e.printStackTrace(); } } }
Table schema modification
Add new column
The following code will add a new column Test in the table
public static void AddStdColFamily () throws Throwable { Connection con = ConnectionFactory.createConnection(cfg); //Get table object TableName tablename = TableName.valueOf("Student"); //Get Admin object Admin admin = con.getAdmin(); HColumnDescriptor newCol = new HColumnDescriptor("Test"); newCol.setMaxVersions(3); if(admin.tableExists(tablename)){ try { admin.disableTable(tablename); admin.addColumn(tablename, newCol); }catch(Exception ex){ ex.printStackTrace(); } } admin.enableTable(tablename); admin.close(); con.close(); }
Modify column cluster properties
The maximum number of historical versions to modify the Test column cluster is 5
public static void ModifyStdColFamily () throws Throwable { Connection con = ConnectionFactory.createConnection(cfg); //Get table object TableName tablename = TableName.valueOf("Student"); //Get Admin object Admin admin = con.getAdmin(); HColumnDescriptor modCol = new HColumnDescriptor("Test"); modCol.setMaxVersions(5); if(admin.tableExists(tablename)){ try { admin.disableTable(tablename); admin.modifyColumn(tablename, modCol); }catch(Exception ex){ ex.printStackTrace(); } } admin.enableTable(tablename); admin.close(); con.close(); }
Delete column
Delete Test column
public static void DeleteStdColFamily() throws Throwable { Connection con = ConnectionFactory.createConnection(cfg); //Get table object TableName tablename = TableName.valueOf("Student"); //Get Admin object Admin admin = con.getAdmin(); if(admin.tableExists(tablename)){ try { admin.disableTable(tablename); admin.deleteColumn(tablename, Bytes.toBytes("Test")); }catch(Exception ex){ ex.printStackTrace(); } } admin.enableTable(tablename); admin.close(); con.close(); }
Insert and modify data in the table (omitted)
Integration with MapReduce
Write data table after reading hdfs file
hdfs file Std.txt The content is
200215125, Jim, Male, 2008-12-09, CS, 89, 78, 56 200215126, Marry, Female, 2001-2-09, AI , 79, 72, 66 200215127, Marker, Male, 2003-12-19, CE, 78, 48, 36
It should be noted that this file cannot have blank lines, otherwise an error will be reported when reading the data
The code is as follows. The Map process reads Std.txt For each line in the file, set the Student number as key and other information of the Student as value, and then write out the intermediate result. The Reduce process is responsible for writing the intermediate results formed by the Map to the Student table of HBase, so Reduce inherits to the TableReducer and is used in the main function
package wit; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; public class StdHdfsToHBase { public static class HDFSMap extends Mapper<Object, Text, Text, Text> { //The map function is implemented to read the std.txt file public void map(Object key, Text value, Context context) throws IOException, InterruptedException { //Take the student's student number as rowKey String stdRowKey = value.toString().split(",")[0]; System.out.println(stdRowKey); //The student information after the student number is value String stdInfo = value.toString().substring(stdRowKey.length()+1); System.out.println(stdInfo); context.write(new Text(stdRowKey), new Text(stdInfo)); } } public static class HDFSReducer extends TableReducer<Text, Text, ImmutableBytesWritable>{ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Put put = new Put(key.getBytes()); for (Text val : values) { String[] stdInfo = val.toString().split(","); put.addColumn("Std".getBytes(), "Name".getBytes(), stdInfo[0].getBytes()); put.addColumn("Std".getBytes(), "gender".getBytes(), stdInfo[1].getBytes()); put.addColumn("Std".getBytes(), "birth".getBytes(), stdInfo[2].getBytes()); put.addColumn("Std".getBytes(), "dept".getBytes(), stdInfo[3].getBytes()); put.addColumn("Course".getBytes(), "math".getBytes(), Bytes.toBytes(Long.parseLong(stdInfo[4]))); put.addColumn("Course".getBytes(), "arts".getBytes(), Bytes.toBytes(Long.parseLong(stdInfo[5]))); put.addColumn("Course".getBytes(), "phy".getBytes(), Bytes.toBytes(Long.parseLong(stdInfo[6]))); //Write student information to HBase table context.write(new ImmutableBytesWritable(key.getBytes()), put); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "hadoop1-ali,hadoop2-hw"); Job job = Job.getInstance(conf, "StdHdfsToHBase"); job.setJarByClass(StdHdfsToHBase.class); // Set up Map job.setMapperClass(HDFSMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Set up Reducer TableMapReduceUtil.initTableReducerJob("Student", HDFSReducer.class, job); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); //set up std.txt Input directory for FileInputFormat.addInputPath(job, new Path("hdfs://hadoop1-ali:9000/input/std")); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
Original from Chen Xi's blog