0


网约车大数据综合项目——数据分析Hive

第1关:Hive 储存数据

开启Hadoop服务,并使得 Hive 连接 MySQL 初始化

start-all.sh
schematool -dbType mysql -initSchema

进入hive命令行

hive

创建数据库并连接数据库

create database trafficdata;
use trafficdata;

创建表并从本地导入数据

create table cancelorder(companyid string,address string,districtname string,orderid string,ordertime string,canceltime string,operator string,canceltypecode string,cancelreason string) row format delimited fields terminated by '|';
load data local inpath '/data/workspace/myshixun/data/canceldata.txt' into table cancelorder;
create table createorder(companyid string,address string,districtname string,orderid string,departtime string,ordertime string,departure string,deplongitude string,deplatitude string,destination string,destlongitude string,destlatitude string) row format delimited fields terminated by '\t';
load data local inpath '/data/workspace/myshixun/data/createdata.txt' into table createorder;

第2关:统计撤销订单中撤销理由最多的前 10 种理由

开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)

start-all.sh
schematool -dbType mysql -initSchema

进入hive命令行

hive

连接数据库

use trafficdata;

创建表

create table cancelreason(cancelreason string,num int) row format delimited fields terminated by '\t';

查询数据并插入表

insert into cancelreason  select cancelreason,count(*) num from cancelorder where cancelreason !='未知' group by cancelreason order by num desc limit 10;

上传表

export table cancelreason to'/user/hadoop/cancelreason';

另开命令行进入MySQL

mysql -h127.0.0.1-uroot -p123123

创建数据库并连接数据库

create database trafficdata;
use trafficdata;

创建表

create table cancelreason(
    cancelreason varchar(255),
    num int not null);

退出MySQL

exit;

下载数据到MySQL的表

sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/cancelreason/data/000000_0'--table cancelreason --fields-terminated-by '\t';

第3关:查询出成功订单最多的 10 个行政区名

开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)

start-all.sh
schematool -dbType mysql -initSchema

进入hive命令行

hive

连接数据库

use trafficdata;

创建表

create table order_district(district string,num int) row format delimited fields terminated by '\t';

查询数据并插入表

insert into order_district select districtname,count(*) num from createorder group by districtname order by num desc limit 10;

上传表

export table order_district to'/user/hadoop/order_district';

另开命令行进入MySQL

mysql -h127.0.0.1-uroot -p123123

连接数据库(无数据库需要新建数据库)

use trafficdata;

创建表

create table order_district(
    cancelreason varchar(255),
    num int not null);

退出MySQL

exit;

下载数据到MySQL的表

sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/order_district/data/000000_0'--table order_district --fields-terminated-by '\t';

第4关:查询湖南省各个市的所有订单总量

开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)

start-all.sh
schematool -dbType mysql -initSchema

进入hive命令行

hive

连接数据库

use trafficdata;

创建表

create table orderbycity(city varchar(255),num int) row format delimited fields terminated by '\t';

查询数据并插入表

INSERTINTOorderbycity(city, num)SELECT'湖南省长沙市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省长沙市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省长沙市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省株洲市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省株洲市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省株洲市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省湘潭市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省湘潭市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省湘潭市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省衡阳市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省衡阳市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省衡阳市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省邵阳市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省邵阳市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省邵阳市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省岳阳市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省岳阳市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省岳阳市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省常德市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省常德市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省常德市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省张家界市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省张家界市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省张家界市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省益阳市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省益阳市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省益阳市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省娄底市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省娄底市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省娄底市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省郴州市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省郴州市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省郴州市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省永州市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省永州市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省永州市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省怀化市'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省怀化市%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省怀化市%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;INSERTINTOorderbycity(city, num)SELECT'湖南省湘西土家族苗族自治州'AS districtname,SUM(total_num)AS total_num FROM(SELECT districtname,SUM(num)AS total_num FROM(SELECT districtname,COUNT(*)AS num FROM cancelorder WHERE districtname LIKE'湖南省湘西土家族苗族自治州%'GROUPBY districtname UNIONALLSELECT districtname,COUNT(*)AS num FROM createorder WHERE districtname LIKE'湖南省湘西土家族苗族自治州%'GROUPBY districtname)AS combined GROUPBY districtname)AS final_result;

上传表

export table orderbycity to'/user/hadoop/orderbycity';

另开命令行进入MySQL

mysql -h127.0.0.1-uroot -p123123

连接数据库(无数据库需要新建数据库)

use trafficdata;

创建表

create table orderbycity(
    city varchar(255),
    num int not null);

退出MySQL

exit;

下载数据到MySQL的表

sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_1'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_2'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_3'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_4'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_5'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_6'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_7'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_8'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_9'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_10'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_11'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_12'--table orderbycity --fields-terminated-by '\t';
sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/orderbycity/data/000000_0_copy_13'--table orderbycity --fields-terminated-by '\t';

第5关:统计湖南省当天的每分钟订单总数量

开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)

start-all.sh
schematool -dbType mysql -initSchema

进入hive命令行

hive

连接数据库

use trafficdata;

创建表

create table order_province_time(`time` string,num int) row format delimited fields terminated by '\t';

查询数据并插入表

insert into order_province_time select datetime,sum(num)as total_num from(select date_format(ordertime,'yyyy-MM-dd HH:mm')as datetime,count(ordertime)as num from cancelorder where districtname like '湖南省%' group by date_format(ordertime,'yyyy-MM-dd HH:mm') union all select date_format(departtime,'yyyy-MM-dd HH:mm')as datetime,count(ordertime)as num from createorder where districtname like '湖南省%' group by date_format(departtime,'yyyy-MM-dd HH:mm'))as combined group by datetime order by datetime;

上传表

export table order_province_time to'/user/hadoop/order_province_time';

另开命令行进入MySQL

mysql -h127.0.0.1-uroot -p123123

连接数据库(无数据库需要新建数据库)

use trafficdata;

创建表

create table order_province_time(
    times varchar(255),
    num int not null);

退出MySQL

exit;

下载数据到MySQL的表

sqoop export--connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123--export-dir '/user/hadoop/order_province_time/data/000000_0'--table order_province_time --fields-terminated-by '\t';

本文转载自: https://blog.csdn.net/CYDYYD/article/details/133848823
版权归原作者 BBQL1 所有, 如有侵权,请联系我们删除。

“网约车大数据综合项目——数据分析Hive”的评论:

还没有评论