hive sql 50 exercises

Keywords: Database hive SQL

Database and table building

--Build database
create database test;

--Build table
create table student(s_id string,s_name string,s_birth string,s_sex string) row format delimited fields terminated by '\t';
create table course(c_id string,c_name string,t_id string) row format delimited fields terminated by '\t';
create table teacher(t_id string,t_name string) row format delimited fields terminated by '\t';
create table score(s_id string,c_id string,s_score int) row format delimited fields terminated by '\t';

Generate data
vim /opt/module/test/course.txt

01	chinese	02
02 	mathematics	01
03	English	03

vim /opt/module/test/score.txt

01	01	80
01	02	90
01	03	99
02	01	70
02	02	60
02	03	80
03	01	80
03	02	80
03	03	80
04	01	50
04	02	30
04	03	20
05	01	76
05	02	87
06	01	31
06	03	34
07	02	89
07	03	98

vim /opt/module/test/student.txt

01	Lei Zhao	1990-01-01	male
02	Qian Dian	1990-12-21	male
03	Sun Feng	1990-05-20	male
04	Li Yun	1990-08-06	male
05	Zhou Mei	1991-12-01	female
06	Wu Lan	1992-03-01	female
07	Zheng Zhu	1989-07-01	female
08	Ju Wang	1990-01-20	female

vim /opt/module/test/teacher.txt

01	Zhang San
02	Li Si
03	Wang Wu

Import data

--Import data
load data local inpath '/opt/module/test/student.txt' into table student;
load data local inpath '/opt/module/test/course.txt' into table course;
load data local inpath '/opt/module/test/teacher.txt' into table teacher;
load data local inpath '/opt/module/test/score.txt' into table score;
  1. Query the information and course scores of students with higher scores in "01" than "02":
--Mode 1: direct connection sc1 and sc2
select
    sc1.s_id,
    sc1.s_score score01,
    sc2.s_score score02
from score sc1
join score sc2 on sc1.s_id=sc2.s_id
where sc1.c_id='01' and sc2.c_id='02' and sc1.s_score>sc2.s_score;

--Mode 2
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id and s1.c_id='01'
left join score s2 on student.s_id = s2.s_id and s2.c_id='02'
where s1.s_score>s2.s_score;

--Mode 3
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id
left join score s2 on student.s_id = s2.s_id
where s1.c_id='01' and s2.c_id='02' and s1.s_score>s2.s_score;

--Mode 4
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on s1.c_id='01'
left join score s2 on s2.c_id='02'
where student.s_id = s1.s_id and student.s_id = s2.s_id and s1.s_score>s2.s_score;
  1. Query the information and course scores of students with lower scores in "01" than "02": (the same as question 1 above)
select
    student.*,
    s1.s_score score01,
    s2.s_score score02
from student
join score s1 on student.s_id = s1.s_id and s1.c_id='01'
left join score s2 on s1.s_id = s2.s_id and s2.c_id='02'
where s1.s_score<s2.s_score;
  1. Query the student number, student name and average score of students whose average score is greater than or equal to 60:
--Mode 1: round Function can be removed null value
select
    t1.sid,
    t1.sname,
    t1.avg_score
from
(
    select
        student.s_id sid,
        student.s_name sname,
        round(avg(s.s_score),1)  avg_score
    from student
    join score s on student.s_id = s.s_id
    group by student.s_id,student.s_name
)t1
where avg_score>=60
order by t1.sid;

--Method 2: This is better
--Step 1: query each sid Corresponding GPA (pass) round Function removal null Value, and take the decimal point (2 digits) as a temporary table tmp
--Step 2: transfer student Table and tmp Table and filter out the average score avg_score Information larger than 60.
select
    student.s_id,
    student.s_name,
    tmp.avg_score
from student
join
(
    select
        s_id,
        round(avg(s_score),2) avg_score
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id
where tmp.avg_score>=60;

--Mode 3:take student Table and score Table, through s_id and s_name Group, and query the student information and average score of each group whose average score is greater than or equal to 60 on the basis of grouping
--This is relatively simple and easy to think of
select
    student.s_id,
    student.s_name,
    round(avg(s.s_score),2)
from student
join score s on student.s_id = s.s_id
group by student.s_id, student.s_name
having avg(s.s_score)>=60;
  1. Query the student number, student name and average score of students with an average score of less than 60: (including those with scores and those without scores)
--Method 1: query results with scores less than 60 and no scores union all
--Tips:union all duplicate removal; union No weight removal
--There are students with scores greater than 60
select
    student.s_id,
    student.s_name,
    avg_score
from student
join
(
select
    tmp.s_id,
    tmp.avg_score
 from (
          select s_id,
                 round(avg(s_score), 2) avg_score
          from score
          group by s_id
      ) tmp
 where tmp.avg_score < 60
)t1
on student.s_id=t1.s_id
union all
(
--No results
    select
        student.s_id,
        student.s_name,
        0 avg_score
    from student
    left join score s on student.s_id = s.s_id
    where student.s_id not in
    (
        select s_id
        from score
        group by s_id
    )
);

--Mode 1
select
    student.s_id,
    student.s_name,
    tmp.avgScore from student
join
(
    select
        score.s_id,
        round(avg(score.s_score),1)as avgScore
    from score group by s_id
)as tmp
on tmp.avgScore < 60
where student.s_id=tmp.s_id
union all
select
    s2.s_id,
    s2.s_name,
    0 as avgScore
from student s2
where s2.s_id not in
(
    select
        distinct sc2.s_id
    from score sc2
);

--Mode 2: use having
select
    student.s_id,
    student.s_name,
    round(avg(s.s_score),2)
from student
join score s on student.s_id = s.s_id
group by student.s_id, student.s_name
having avg(s.s_score)<60
union all
--No results
    select
        student.s_id,
        student.s_name,
        0 avg_score
    from student
    left join score s on student.s_id = s.s_id
    where student.s_id not in
    (
        select s_id
        from score
        group by s_id
    );
  1. Query the student number, student name, total number of courses selected and the total score of all courses of all students:
--Mode 1:
--1.Through query score Query the total number of courses selected with scores and the total scores of all courses
--2.Deal with the total number of courses selected and the total score of all courses null And set it to 0
select
    student.s_id,
    student.s_name,
    case when tmp.count_course is null then 0 else tmp.count_course end,
    case when tmp.sum_score is null then 0 else tmp.sum_score end
from student
left join
(
    select
        s_id,
        count(c_id) count_course,
        sum(score.s_score) sum_score
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id;

--Mode 2: direct group by
select
    student.s_id,
    student.s_name,
    count(s.c_id) total_course,
    sum(s.s_score) total_score
from student
left join score s on student.s_id = s.s_id
group by student.s_id,student.s_name;
  1. Query the number of teachers surnamed "Li"
select
    t_name,
    count(1)
from teacher
where t_name like 'Lee%'
group by t_name;
  1. Query the information of students who have learned "Zhang San's" Teaching:
select *
from student
join score s on student.s_id = s.s_id
join course c on s.c_id = c.c_id
join teacher t on c.t_id = t.t_id and t.t_name='Zhang San';
  1. Query the information of students who have not learned "Zhang San" Teaching:
select
    student.*
from student
left join
(
    select
        s_id
    from score
    join course c on score.c_id = c.c_id
    join teacher t on c.t_id = t.t_id
)tmp
on student.s_id=tmp.s_id
where tmp.s_id is null--This idea is better. In turn, ask teacher Zhang San for what he taught;
  1. Query the information of students who have studied the course numbered "01" and also studied the course numbered "02":
--Mode 1,out of commission'01',Instead, use 1
select student.*
from student
where s_id in
(
    select
        t1.s_id
    from
    (
        select
            s_id
        from score
        where c_id=1
        group by s_id
    )t1
    join
    (
        select
            s_id
        from score
        where c_id=2
        group by s_id
    )t2
    on t1.s_id=t2.s_id
);
--Mode 2
select student.*
from student
join
(
    select
        s_id
    from score
    where c_id=1
) tmp1
on student.s_id = tmp1.s_id
join
(
    select
        s_id
    from score
    where c_id=2
)tmp2
on student.s_id = tmp2.s_id;
  1. Query the information of students who have studied the course numbered "01" but have not studied the course numbered "02"
select
    student.s_id,
    s_name,
    s_sex,
    s_birth
from student
join
(
select
    s_id
from score
where c_id='01'
)t1
on student.s_id=t1.s_id
left join
(
    select
        s_id
    from score
    where c_id='02'
)t2
on student.s_id=t2.s_id
where t2.s_id is null;
  1. Query the information of students who have not studied all courses:
select
    student.s_id,
    student.s_name,
    student.s_sex,
    student.s_birth
from student
join
(
    select
        s_id
    from score
    group by s_id
    having count(c_id)<3
) t1
on student.s_id=t1.s_id;
--Method 1: I don't know if this is OK
select
    student.*
from student
where s_id in 
(
    select
        s_id
    from score
    where c_id in 
    (
        select
            c_id
        from score
        where s_id='01'
    )t1
)t2

--Method 2: first use join
select 
    s_id,
    s_name,
    s_sex
from student
join
(
    select 
        c_id
    from score 
    where s_id='01'
)t1
join
(
    select
        s_id,
        c_id
    from score
)t2
on t1.c_id=t2.c_id and student.s_id=t2.s_id
where student.s_id is not '01'
group by s_id,s_name,s_sex;
  1. Query the information of other students who have exactly the same courses as the students of "01"
--on What's the difference between writing conditions together and writing conditions separately???
--remarks:hive I won't support it group_concat method,available concat_ws('|', collect_set(str)) realization
select
    student.*
from student
join
(
    select
        s_id,
        concat_ws('|',collect_set(c_id)) concat_cid
    from score
    group by s_id
    where s_id in not '01'
)t1
on student.s_id=t1.s_id
join
(
    select 
        concat_ws('|',collect_set(c_id)) concat_cid
    from score 
    group by s_id
    where s_id='01'
)t2
on t1.concat_cid=t2.concat_cid ;
  1. Query the names of students who have not studied any course taught by "Zhang San":
--This idea is awesome!!!
--First query the information of students who have studied Professor Zhang San's courses, and then find the students who have not studied Professor Zhang San's courses
select
    student.s_id,
    student.s_name
from student
left join
(
    select
        s_id
    from score 
    join
    (
        select 
            c_id
        from course
        join teacher
        on course.t_id=teacher.t_id
        where teacher.t_name='Zhang San'
    )t1
    on score.c_id=t1.c_id
)t2
on student.s_id=t2.s_id
where t2.s_id is null;--Pay attention here!!!

  1. Query the student number, name and average score of students who fail two or more courses:
select
    s_id,
    s_name,
   avg(s_score)
from 
(
select
    s_id,
    c_id,
    case when s_score<60 then 'fail,' else 'pass' s_score_jige
from score 
);
  1. Retrieve the student information of "01" course whose score is less than 60 in descending order:
select
    student.s_id,
    student.s_name,
    student.s_sex,
    student.s_birth,
    t1.s_score
from student
join
(
    select
        s_id,
        s_score
    from score
    where c_id='01' and s_score<60
    order by s_score desc
)t1
on student.s_id=t1.s_id;
  1. Displays the scores of all courses and average scores of all students from high to low by average score:
--Similar column to row!!
--By students id,curriculum id And grades converted to--student id Average score of mathematics, Chinese and English
--Step 1: display the average scores of all students from high to low id And GPA --Note that Wang Ju did not score in 08
--Step 2: turn the three courses into columns through the left connection
--Note: because you need to query the field of the course, you need to query it in group by This field needs to be added when
select
    score.s_id,
    t1.s_score as Chinese,
    t2.s_score as Math,
    t3.s_score as English,
    round(avg(score.s_score),2) avg_score
from score
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='01'
)t1
on score.s_id=t1.s_id
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='02'
)t2
on score.s_id=t2.s_id
left join
(
    select
        s_id,
        s_score
    from score
    where c_id='03'
)t3
on score.s_id=t3.s_id
group by score.s_id,t1.s_score,t2.s_score,t3.s_score
order by avg_score desc;

  1. Query the highest score, lowest score and average score of each subject:
-- The course is displayed as follows: ID,curriculum name,Highest score, lowest score, average score, pass rate, medium rate, excellent rate, excellent rate
-- Pass for>=60,Medium: 70-80,Excellent: 80-90,Excellent are:>=90
select
    c.c_id,
    c.c_name,
    max_score,
    min_score,
    avg_score,
    passRate,
    middleRate,
    goodRate,
    excellentRate
from course c
join
(
    select
        c_id,
        max(s_score) max_score,
        min(s_score) min_score,
        round(avg(s_score),2) avg_score,
        round(sum(case when s_score>=60 then 1 else 0 end)/count(c_id),2) passRate,
        round(sum(case when s_score>=70 and s_score<80 then 1 else 0 end)/count(c_id),2) middleRate,
        round(sum(case when s_score>=80 and s_score<90 then 1 else 0 end)/count(c_id),2) goodRate,
        round(sum(case when s_score>=90 then 1 else 0 end)/count(c_id),2) excellentRate
    from score
    group by c_id
)t1
on c.c_id=t1.c_id;

  1. Sort according to the scores of each subject and display the ranking
-- Windowing function
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='01'
union all
(
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='02'
)union all
(
select
    score.*,
    row_number() over (order by s_score desc) Ranking
from score
where c_id='03'
);
  1. Query the total score of students and rank them
select
    s_id,
    sum(s_score) sum_score,
    row_number() over (order by sum(s_score) desc ) rk
from score
group by s_id;
  1. Query the average scores of different courses taught by different teachers from high to low: the average scores of different courses are displayed from high to low
--Method 1
select
    course.t_id,
    course.c_id,
    t_name,
    round(avg(s_score),2) avg_score
from course
join score s on course.c_id = s.c_id
join teacher t on course.t_id = t.t_id
group by course.c_id,course.t_id,t_name
order by avg_score desc ;

--Method 2
select
    course.c_id,
    course.t_id
    t_name,
    round(avg(s_score),2)as avgscore
from course,teacher,score
where teacher.t_id=course.t_id and course.c_id=score.c_id
group by course.c_id,course.t_id,t_name
order by avgscore desc;
  1. Query the information of students from the 2nd to the 3rd in all courses and the course results:
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='01'
    order by s_score desc
)t1
where rk=2 or rk=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='02'
    order by s_score desc
)t1
where rk=2 or rk=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc )rk
    from score
    where c_id='03'
    order by s_score desc
)t1
where rk=2 or rk=3;
  1. Count the number of students in each score segment of each subject: course number, course name, [100-85], [85-70], [70-60], [0-60] and percentage
select
    course.c_id,
    course.c_name,
    first,
    firstRate,
    second,
    secondRate,
    third,
    thirdRate,
    fourth,
    fourthRate
from course
join
(
    select
        c_id,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) first,
        round(sum(case when s_score<=100 and s_score>=85 then 1 else 0 end)/sum(c_id),2) as firstRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) second,
        round(sum(case when s_score<=85 and s_score>=70 then 1 else 0 end)/sum(c_id),2) as secondRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) third,
        round(sum(case when s_score<=70 and s_score>=60 then 1 else 0 end)/sum(c_id),2) as thirdRate,
        sum(case when s_score<=100 and s_score>=85 then 1 else 0 end) fourth,
        round(sum(case when s_score<=60 and s_score>=0 then 1 else 0 end)/sum(c_id),2) as fourthRate
    from score
    group by c_id
)tmp
on course.c_id=tmp.c_id;
  1. Query the average score and ranking of students:
select
    s_id,
    round(avg(s_score) ,2) avg_score,
    row_number() over (order by avg(s_score) desc ) rk
from score
group by s_id;
  1. Query the top three records of each subject
(select
    s_id,
    c_id,
    s_score
from score
where c_id='01'
order by s_score desc
limit 3)
union all
(
    select
        s_id,
        c_id,
        s_score
    from score
    where c_id='02'
    order by s_score desc
    limit 3
)
union all
(
    select
        s_id,
        c_id,
        s_score
    from score
    where c_id='03'
    order by s_score desc
    limit 3
);
  1. Query the number of students selected for each course:
select
    c_id,
    count(s_id)
from score
group by c_id;
  1. Find out the student numbers and names of all students with only two courses
--Method 1: screening in the outermost layer
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id,
        count(c_id) count_cid
    from score
    group by s_id
)tmp
on student.s_id=tmp.s_id
where tmp.count_cid=2;
--Method 2: use having screen
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id,
        count(c_id) count_cid
    from score
    group by s_id
    having count_cid=2
)tmp
on student.s_id=tmp.s_id;
--Or be simple
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id
    from score
    group by s_id
    having count(s_score)=2
)tmp
on student.s_id=tmp.s_id;
  1. Query the number of boys and girls:
select
    sum(case when s_sex='male' then 1 else 0 end) men,
    sum(case when s_sex='female' then 1 else 0 end) women
from student;
  1. Query the student information with the word "Feng" in the name:
select
    *
from student
where s_name like '%wind%';
  1. Query the list of students with the same name and count the number of students with the same name:
select
    s1.s_id,
    s1.s_name,
    count(*) as sameStu
from student s1,test.student s2
where s1.s_name=s2.s_name and s1.s_id<>s2.s_id and s1.s_sex=s2.s_sex
group by s1.s_id,s1.s_name,s1.s_sex;
  1. To check the list of students born in 1990:
select
    *
from student
where s_birth like '1990%';
  1. Query the average score of each course, and the results are arranged in descending order according to the average score. If the average score is the same, it is arranged in ascending order according to the course number:
select
    c_id,
    round(avg(s_score),2) avg_score
from score
group by c_id
order by avg_score desc,c_id;
  1. Query the student number, name and average score of all students whose average score is greater than or equal to 85:
select
    student.s_id,
    s_name,
    avg_score
from student
join
(
    select
        s_id,
        round(avg(s_score),2) avg_score
    from score
    group by s_id
    having avg(s_score)>=85--Alias cannot be written here
)tmp
on student.s_id=tmp.s_id;
  1. Query the names and scores of students whose course name is "Mathematics" and whose score is less than 60:
--First look at several tables together, and then use where Filter conditions to avoid Cartesian product
select
    s_name,
    s_score
from student,score,course
where student.s_id=score.s_id and score.c_id=course.c_id and score.s_score<60 and c_name='mathematics';
  1. Query all students' courses and scores:
--Name Chinese math English total score
select
    s.s_id,
    s_name,
    sum(case when c_name='chinese' then s_score else 0 end) as Chinese,
    sum(case when c_name='mathematics' then s_score else 0 end) as Math,
    sum(case when c_name='English' then s_score else 0 end) as English
from score
join course c on score.c_id = c.c_id
join student s on score.s_id = s.s_id
group by s.s_id,s_name
order by s_id;
  1. Query the names, course names and scores of students with scores above 70 in any course:
--Check the language first
--behind union all
--union The weight needs to be removed, but the efficiency is low; union all No weight removal, but high efficiency
select
    s_name,
    c_name,
    tmp.s_score
from
(
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='01' and s_score>70
    )
    union all
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='02' and s_score>70
    )
    union all
    (
        select
            s_id,
            c_id,
            s_score
        from score
        where c_id='03' and s_score>70
    )
)tmp
left join course on course.c_id=tmp.c_id
left join student on student.s_id = tmp.s_id;
  1. Inquire about students who fail the course
select *
from score
where s_score<60;
  1. Query the student number and name of students with course number 01 and course score above 80:
select
    student.s_id,
    s_name
from student
join
(
    select
        s_id
    from score
    where c_id='01' and s_score>=80
)tmp
on student.s_id=tmp.s_id;
  1. Number of students per course:
select
    c_id,
    count(1)
from score
group by c_id;
  1. Query the information and scores of the students with the highest scores among the students taking the courses taught by "Zhang San"
select
    s_id,
    s_score
from score
join
(
    select
        course.c_id,
        course.c_name,
        course.t_id,
        t1.t_name
    from course
    join
    (
        select
            t_id,
            t_name
        from teacher
        where t_name='Zhang San'
    )t1
    on course.t_id=t1.t_id
)t2
on score.c_id=t2.c_id
order by s_score desc
limit 1;
  1. Query the student number, course number and student score of students with the same score in different courses:
select distinct--duplicate removal
    s1.s_id,
    s1.c_id,
    s1.s_score
from score s1,score s2
where s1.c_id<>s2.c_id and s1.s_score=s2.s_score;
  1. Query the top three with the best scores in each course:
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='01'
)t1
where rk<=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='02'
)t1
where rk<=3
union all
select
    t1.*
from
(
    select
        *,
        row_number() over (order by s_score desc ) rk
    from score
    where c_id='03'
)t1
where rk<=3;
  1. Count the number of elective students for each course (only for courses with more than 5 students): it is required to output the course number and the number of elective students. The query results are arranged in descending order by the number of students. If the number of students is the same, they are arranged in ascending order by the course number
select
    c_id,
    count(1)
from score
group by c_id
having count(1) > 5
order by count(1) desc,c_id;
  1. Retrieve the student number of at least two courses
select
    s_id,
    count(c_id)
from score
group by s_id
having count(c_id)>=2;
  1. Query the information of students who have taken all courses:
select
    s_id,
    count(c_id)
from score
group by s_id
having count(c_id)=3;
  1. Query the age of each student (one year):
    – according to the date of birth, if the current month day is less than the month day of the year of birth, the age will be reduced by one
select
    s_id,
    case
        when month(s_birth)<month(`current_date`()) or (month(s_birth)=month(`current_date`()) and day(s_birth)<day(`current_date`()))
        then (year(`current_date`())-year(s_birth)-1)
        else (year(`current_date`())-year(s_birth))
    end age
from student;

--Adjust it
select
    s_id,
    year(`current_date`())-year(s_birth) -
    (
        case
            when month(s_birth)<month(`current_date`()) then 1
            when month(s_birth)=month(`current_date`()) and day(s_birth)<day(`current_date`()) then 1
            else 0
        end
    )as age
from student;
  1. Inquire about the students whose birthday is this week
select
    *
from student
where weekofyear(`current_date`())=weekofyear(s_birth);
  1. Check the students whose birthday is next week
select
    *
from student
where weekofyear(`current_date`())+1=weekofyear(s_birth);
  1. Query students whose birthdays are this month
select
    *
from student
where month(`current_date`())=month(s_birth);
  1. Query students whose birthdays are in December
select
    *
from student
where month(s_birth)='12';

Posted by sujata_ghosh on Sat, 09 Oct 2021 01:56:40 -0700