[Hard Hive] MYSQL/HIVESQL Pen Test: HIVESQL

Keywords: MySQL hive

Welcome to the blog home page: WeChat Search: import_bigdata, original hard core creator of big data domain _import_bigdata_CSDN blog
Welcome to comment, collection, message, message exchange!
This article was originally created by Wang Zhiwu and first published in the CSDN blog!
The first CSDN forum of this article is forbidden to be reproduced without official and personal permission.

This article is about Learning Guide for Big Data Specialists from Zero (Full Upgrade) Part of HIV supplement.

4. Handwritten HQL Issue 4

A table STG.ORDER is known with the following fields: Date, Order_id, User_id, amount. Please give sql for statistics: sample data: 2017-01-01, 10029028, 1000003251, 33.57.

1) Give the number of orders, users and total transaction amount for each month in 2017.

2) Give the number of new customers in November 2017 (i.e. the first order is in November)

Building tables

create table order_tab(dt string,order_id string,user_id string,amount decimal(10,2)) row format delimited fields terminated by '\t';

1) Give the number of orders, users and total transaction amount for each month in 2017.

select
   date_format(dt,'yyyy-MM'),
   count(order_id),
   count(distinct user_id),
   sum(amount)
from
   order_tab
where
   date_format(dt,'yyyy')='2017'
group by
   date_format(dt,'yyyy-MM');

2) Give the number of new customers in November 2017 (i.e. the first order is in November)

select
   count(user_id)
from
   order_tab
group by
   user_id
having
   date_format(min(dt),'yyyy-MM')='2017-11';

5. Handwritten HQL Question 5

There are logs below. Write a code to find the total and average age of all users and active users. (Active users refer to users who have access records for two consecutive days) Date User Age

data set

2019-02-11,test_1,23
2019-02-11,test_2,19
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-12,test_2,19
2019-02-13,test_1,23
2019-02-15,test_2,19
2019-02-16,test_2,19

1) Table building

create table user_age(dt string,user_id string,age int)row format delimited fields terminated by ',';

2) Grouped by date and user, sorted by date and ranked

select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id;t1

3) Calculate the difference between date and rank

select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   t1;t2

4) Filter traveling values greater than or equal to 2, i.e. active users for two consecutive days

select
   user_id,
   min(age) age
from
   t2
group by
   user_id,flag
having
   count(*)>=2;t3

5) De-process the data (a user can log in continuously at two different times), for example, user a logs in on January 11, 10, and on January 20 and 24, 21.

select
   user_id,
   min(age) age
from
   t3
group by
   user_id;t4
6)Calculate the number of active users (two consecutive days of visits) and average age
select
   count(*) ct,
   cast(sum(age)/count(*) as decimal(10,2))
from t4;

7) Weighting the entire dataset by user

select
   user_id,
   min(age) age 
from
   user_age 
group by 
   user_id;t5

8) Calculate the number and average age of all users

select
   count(*) user_count,
   cast((sum(age)/count(*)) as decimal(10,1)) 
from 
   t5;

9) union all operation on step 5 and step 7 datasets

select
   0 user_total_count,
   0 user_total_avg_age,
   count(*) twice_count,
   cast(sum(age)/count(*) as decimal(10,2)) twice_count_avg_age
from 
(
   select
   user_id,
   min(age) age
from
   (select
   user_id,
   min(age) age
from
   (
    select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (
    select
       dt,
       user_id,
       min(age) age,
       rank() over(partition by user_id order by dt) rk
    from
       user_age
    group by
       dt,user_id
    )t1
    )t2
group by
   user_id,flag
having
   count(*)>=2)t3
group by
   user_id 
)t4

union all

select
   count(*) user_total_count,
   cast((sum(age)/count(*)) as decimal(10,1)),
   0 twice_count,
   0 twice_count_avg_age
from 
   (
      select
         user_id,
         min(age) age 
      from 
         user_age 
      group by 
         user_id
   )t5;t6

10) Sum and stitch into final SQL

select 
    sum(user_total_count),
    sum(user_total_avg_age),
    sum(twice_count),
    sum(twice_count_avg_age)
from 
(select
   0 user_total_count,
   0 user_total_avg_age,
   count(*) twice_count,
   cast(sum(age)/count(*) as decimal(10,2)) twice_count_avg_age
from 
(
   select
   user_id,
   min(age) age
from
   (select
   user_id,
   min(age) age
from
   (
    select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (
    select
       dt,
       user_id,
       min(age) age,
       rank() over(partition by user_id order by dt) rk
    from
       user_age
    group by
       dt,user_id
    )t1
    )t2
group by
   user_id,flag
having
   count(*)>=2)t3
group by
   user_id 
)t4

union all

select
   count(*) user_total_count,
   cast((sum(age)/count(*)) as decimal(10,1)),
   0 twice_count,
   0 twice_count_avg_age
from 
   (
      select
         user_id,
         min(age) age 
      from 
         user_age 
      group by 
         user_id
   )t5)t6;

6. Handwritten HQL Title 6

Please use sql to write down the amount of goods that all users first purchased in October this year, in the order table field (purchasing user: userid, amount: money, purchasing time: paymenttime (format: 2017-10-01), order id:orderid)

1) Table building

create table ordertable(
    userid string,
    money int,
    paymenttime string,
    orderid string)
row format delimited fields terminated by '\t';

2) Query out

select
   userid,
   min(paymenttime) paymenttime
from
   ordertable
where
   date_format(paymenttime,'yyyy-MM')='2017-10'
group by
   userid;t1
select
   t1.userid,
   t1.paymenttime,
   od.money
from
   t1
join
   ordertable od
on
   t1.userid=od.userid
   and
   t1.paymenttime=od.paymenttime;
select
   t1.userid,
   t1.paymenttime,
   od.money
from
   (select
   userid,
   min(paymenttime) paymenttime
from
   ordertable
where
   date_format(paymenttime,'yyyy-MM')='2017-10'
group by
   userid)t1
join
   ordertable od
on
   t1.userid=od.userid
   and
   t1.paymenttime=od.paymenttime;

 

Posted by monkeymade on Fri, 10 Sep 2021 10:09:26 -0700