hive表中导入数据多种方法详细说明

文章中对hive表中导入数据方法目录

准备数据

-- 创建 db_myhive_5
create database db_myhive_5;

use db_myhive_5;

-- 创建表
create table tb_score(
s_id string,
c_id string,
score int
)
partitioned by (month string)
row format delimited fields terminated by '\t';

方式一：通过load方式加载数据

load data local inpath '/export/data/hive_data/score.txt' overwrite into table tb_score partition(month='202006');

方式二：直接向分区表中插入数据

通过insert into方式加载数据

create table score3 like tb_score**;**

insert into table score3 partition(*month ='202007'*) values ('001','002','100');

通过查询方式加载数据

create table score4 like score**;**

insert overwrite table score4 partition(*month = '202006'*) select s_id**,c_id,s_score from tb_score;**

例子

需求1: 创建新库 myhive6, 切换库 myhive6

需求2: 创建t_score_1(s_id, c_id, score) 按月指定分区 month, 指定字段分隔符为 '\t'

需求3: 通过 load data 方式加载文件中数据

需求4: 创建表 t_score_2 依据表 t_score_1的结构

需求5: 通过insert into 添加一行数据

需求6: 创建表 t_score_3 依据表 t_score_1的结构

需求7: 通过 select 添加n条记录

实现

-- 需求1: 创建新库 myhive6, 切换库 myhive6

create database myhive6;

use myhive6;

-- 需求2: 创建t_score_1(s_id, c_id, score) 按月指定分区 month, 指定字段分隔符为 '\t'

create table t_score_1(
s_id string,

c_id string,

score int
)

partitioned by (month string)

row format delimited fields terminated by '\t';

-- 需求3: 通过 load data 方式加载文件中数据

load data local inpath '/export/data/hive_data/f_score.csv' overwrite into table t_score_1 partition (month='202101');

-- 需求4: 创建表 t_score_2 依据表 t_score_1的结构

create table t_score_2 like t_score_1;

-- 需求5: 通过insert into 添加一行数据

insert into t_score_2 partition(month='202002') values('01', '02', 66);

select * from t_score_2;

-- 需求6: 创建表 t_score_3 依据表 t_score_1的结构

create table t_score_3 like t_score_1;

-- 需求7: 通过 select 添加n条记录

insert overwrite table t_score_3 partition(month='202003') select s_id, c_id, score from t_score_1;

select * from t_score_3;

方式三：查询语句中创建表并加载数据（as select）

将查询的结果保存到一张表当中去

create table score5 as select ***** from score**;**

方式四：创建表时通过location指定加载数据路径

1. 创建表，并指定在hdfs上的位置

create external table score6 (s_id string,c_id string,s_score int)

row format delimited

fields terminated by '\t'

location '/myscore6';

2. 上传数据到hdfs上

hdfs dfs **-**mkdir **-**p **/**myscore6

hdfs dfs **-put score.**txt /myscore6;

3. 查询数据

select ***** from score6**;**

例子

1 创建表 t_score_6(s_id, c_id, score), 指定分隔符为'\t', 指定保存位置为 '/hivedatas/t_score_6';

2 将分数信息文件上传到 hdfs的目录下 '/hivedatas/t_score_6'

3 查看表中的数据

实现

-- 1 创建表 t_score_6(s_id, c_id, score), 指定分隔符为'\t', 指定保存位置为 '/hivedatas/t_score_6';

create table t_score_6(
s_id string,

c_id string,

score int
)

row format delimited fields terminated by '\t'

location '/hivedatas/t_score_6';

-- 2 将分数信息文件上传到 hdfs的目录下 '/hivedatas/t_score_6'

-- hdfs dfs -put 文件 /hivedatas/t_score_6

-- 3 查看表中的数据

select * from t_score_6;

方式五：export导出与 import导入 hive表数据（内部非分区表操作）

create table teacher2 like teacher**;**

export table teacher to '/export/teacher';

import table teacher2 from '/export/teacher'

注意: import 导入时结尾不要增加分号;

标签： hive hadoop 数据仓库

本文转载自: https://blog.csdn.net/nuhao/article/details/136280040
版权归原作者 nuhao_ 所有，如有侵权，请联系我们删除。