Use demonstration of Hive lateral view

Keywords: hive Oracle MySQL Hadoop

Lateral view is used with split, expand and other UDTF to split a row of data into multiple rows. On this basis, the split data can be aggregated. Lateral view first calls UDTF for each row of the original table, and UDTF will split a row into one or more rows. Then lateral view combines the results to produce a virtual table that supports alias tables.
1. Single laser view
Table and data preparation:
drop table if exists lateral_ex1;
create table lateral_ex1(id string,arrlist array<BIGINT>,text string)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex1.txt
190	1030,1031,1032,1033,1190	select id
191	1030,1031,1032,1033,1190	select id
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex1.txt' OVERWRITE INTO TABLE lateral_ex1;  
Query demonstration:
hive> select * from lateral_ex1;
OK
190     [1030,1031,1032,1033,1190]      select id
191     [1030,1031,1032,1033,1190]      select id
Time taken: 0.087 seconds, Fetched: 2 row(s)
select id,arrlist,col2,text from lateral_ex1 LATERAL VIEW explode(arrlist) tab1 as col2;
OK
190     [1030,1031,1032,1033,1190]      1030    select id
190     [1030,1031,1032,1033,1190]      1031    select id
190     [1030,1031,1032,1033,1190]      1032    select id
190     [1030,1031,1032,1033,1190]      1033    select id
190     [1030,1031,1032,1033,1190]      1190    select id
191     [1030,1031,1032,1033,1190]      1030    select id
191     [1030,1031,1032,1033,1190]      1031    select id
191     [1030,1031,1032,1033,1190]      1032    select id
191     [1030,1031,1032,1033,1190]      1033    select id
191     [1030,1031,1032,1033,1190]      1190    select id
Time taken: 73.225 seconds, Fetched: 10 row(s)
2. Multiple later views
Table and data preparation:
drop table if exists lateral_ex2;
create table lateral_ex2(id string,arrlist1 array<BIGINT>,arrlist2 array<STRING>)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex2.txt
190	1030,1031,1032,1033,1190	NBA,CX4,AuDi
191	1030,1031,1032,1033,1190	Hive,Oracle,Mysql
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex2.txt' OVERWRITE INTO TABLE lateral_ex2;  
Query demonstration:
hive> select * from lateral_ex2;
OK
190     [1030,1031,1032,1033,1190]      ["NBA","CX4","AuDi"]
191     [1030,1031,1032,1033,1190]      ["Hive","Oracle","Mysql"]
Time taken: 0.053 seconds, Fetched: 2 row(s)

SELECT id,myCol1,myCol2 FROM lateral_ex2
LATERAL VIEW explode(arrlist1) myTable1 AS myCol1
LATERAL VIEW explode(arrlist2) myTable2 AS myCol2;
OK
190     1030    NBA
190     1030    CX4
190     1030    AuDi
190     1031    NBA
190     1031    CX4
190     1031    AuDi
190     1032    NBA
190     1032    CX4
190     1032    AuDi
190     1033    NBA
190     1033    CX4
190     1033    AuDi
190     1190    NBA
190     1190    CX4
190     1190    AuDi
191     1030    Hive
191     1030    Oracle
191     1030    Mysql
191     1031    Hive
191     1031    Oracle
191     1031    Mysql
191     1032    Hive
191     1032    Oracle
191     1032    Mysql
191     1033    Hive
191     1033    Oracle
191     1033    Mysql
191     1190    Hive
191     1190    Oracle
191     1190    Mysql
Time taken: 24.182 seconds, Fetched: 30 row(s)
3. Character row column conversion
Table and data preparation:
drop table if exists lateral_ex3;
create table lateral_ex3(dayno string,col1 string,col2 string)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
STORED AS TEXTFILE;
cat lateral_ex3.txt
20141018	aa|bb	7|9|0|3
20141019	cc|dd	6|1|8|5
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex3.txt' OVERWRITE INTO TABLE lateral_ex3;  
Query demonstration:
hive> select * from lateral_ex3;
OK
20141018        aa|bb   7|9|0|3
20141019        cc|dd   6|1|8|5
Time taken: 0.058 seconds, Fetched: 2 row(s)

select dayno,mycol1,mycol2 from lateral_ex3 
lateral view explode(split(col1,"\\|")) tb1 as mycol1
lateral view explode(split(col2,"\\|")) tb2 as mycol2;
OK
20141018        aa      7
20141018        aa      9
20141018        aa      0
20141018        aa      3
20141018        bb      7
20141018        bb      9
20141018        bb      0
20141018        bb      3
20141019        cc      6
20141019        cc      1
20141019        cc      8
20141019        cc      5
20141019        dd      6
20141019        dd      1
20141019        dd      8
20141019        dd      5
Time taken: 28.739 seconds, Fetched: 16 row(s)

Posted by ayzee01 on Sat, 02 May 2020 19:41:55 -0700