0


电影票房之数据分析(Hive)

电影票房之数据分析(Hive)

第1关:统计2020年上映的电影中,当前总票房最高的10部电影

#进入hive

hive

#在hive中创建数据库 mydb

createdatabase mydb;

#使用数据库 mydb

use mydb;

#创建表moviecleaned并使用"/t"分割字段

createtable moviecleaned(movie_name string,boxoffice string, box_rate string, sessions string, show_count_rate string, avg_number string, attendance string, total_boxoffice string, movie_days string,current_time string,releaseDate string)row format delimited fieldsterminatedby'\t' 
stored as textfile;

#将本地清洗后的数据导入moviecleaned表中

loaddatalocal inpath '/data/workspace/myshixun/data/movies.txt'intotable moviecleaned;

#创建top10_boxoffice表,用来存放数据查询的结果

createtable top10_boxoffice(movie_name string, boxoffice float)row format delimited fieldsterminatedby'\t' stored as textfile;

#查询,并将结果导入top10_boxoffice表中

insert overwrite table top10_boxoffice 
select movie_name,max(round(total_boxoffice,1)) m 
from moviecleaned 
WHERE releaseDate like'2020%'groupby movie_name  
ORDERBY m DESClimit10;

第2关: 统计2020年国庆假期中电影票房增长最多的三部电影及其每日的票房数据

#创建boxoffice_national_day表,用来存放数据查询的结果

createtable boxoffice_national_day(movie_name string, boxoffice float,dates string)row format delimited fieldsterminatedby'\t' stored as textfile;

#查询,并将结果导入boxoffice_national_day表中

insert overwrite table boxoffice_national_day 
select movie_name,boxoffice,current_timefrom moviecleaned 
WHERE movie_name in(select t.movie_name from(select movie_name,sum(boxoffice)as n from moviecleaned WHEREcurrent_timebetween'2020-10-01'and'2020-10-07'GROUPBY movie_name orderby n descLIMIT3)as t)andcurrent_timebetween'2020-10-01'and'2020-10-07';

第3关:统计2020年中当日综合总票房最多的10天

#创建day_max_boxoffice表,用来存放数据查询的结果

createtable day_max_boxoffice(dates string, boxoffice float)row format delimited fieldsterminatedby'\t' stored as textfile;

#查询,并将结果导入day_max_boxoffice表中

insert overwrite table day_max_boxoffice 
selectcurrent_time,round(sum(boxoffice),2)as n 
from moviecleaned 
WHERE releaseDate like'2020%'groupbycurrent_timeORDERBY n DESClimit10;

第4关:统计2020年首映的电影上映后7天的电影票房信息

#创建movie_boxoffice表,用来存放数据查询的结果

createtable movie_boxoffice(movie_name string,dates string, boxoffice float)row format delimited fieldsterminatedby'\t' stored as textfile;

#查询,并将结果导入movie_boxoffice表中

insert overwrite table movie_boxoffice 
select t.movie_name,moviecleaned.current_time,boxoffice 
from moviecleaned leftjoin(select movie_name,current_timefrom moviecleaned WHERE movie_days="上映首日"and releaseDate like"2020%"GROUPBY movie_name,current_time) t 
on moviecleaned.movie_name=t.movie_name 
WHERE moviecleaned.current_timebetween t.current_timeand DATE_ADD(t.current_time,+6)ORDERBY t.movie_name,moviecleaned.current_time;

第5关:统计2020年元旦节与国庆节放假后7天的观影人数

#创建movie_boxoffice表,用来存放数据查询的结果

createtable festival_boxoffice(dates string,festival string, num int)row format delimited fieldsterminatedby'\t' stored as textfile;

#查询,并将结果导入movie_boxoffice表中

insert overwrite table festival_boxoffice 
select split(current_time,'-')[2],casewhen  t.current_timebetween'2020-10-01'and'2020-10-07'then'national_day'when  t.current_timebetween'2020-01-01'and'2020-01-07'then'new_year_day'else'other'ENDas festival ,
cast(sum(num)asbigint)from(selectcurrent_time,avg_number*sessions as num from moviecleaned 
WHEREcurrent_timebetween'2020-10-01'and'2020-10-07'orcurrent_timebetween'2020-01-01'and'2020-01-07') t 
GROUPBYcurrent_time;

本文转载自: https://blog.csdn.net/qq_52792570/article/details/130970276
版权归原作者 WA_自动机 所有, 如有侵权,请联系我们删除。

“电影票房之数据分析(Hive)”的评论:

还没有评论