一、Map
1. 构建
语法: map (key1, value1, key2, value2, …)
说明:根据输入的key和value对构建map类型
--> 1. 一般创建方法
select map('key1_name','张三','key2_age',20) as map_col
-- 结果:
{"key1_name":"张三","key2_age":"20"}
--> 2. 根据SQL查询结果构建map
select map('k_name',name,'k_age',age) as map_col
from
(
select '张三' as name, 23 as age
union select '李四' as name, 24 as age
union select '王五' as name, 25 as age
)
-- 结果:
{"k_age":"25","k_name":"王五"}
{"k_age":"23","k_name":"张三"}
{"k_age":"24","k_name":"李四"}
2. 读取
语法: M[key]
操作类型: M为map类型,key为map中的key值
说明:返回map类型M中,key值为指定值的value值。
with base_tb as (
select map('k_name',name,'k_age',age) as map_col
from
(
select '张三' as name, 23 as age
union select '李四' as name, 24 as age
union select '王五' as name, 25 as age
)
)
select map_col['k_name'] as col from base_tb
-- 结果:
王五
张三
李四
3. 其他常用函数
--> 1.size(Map)函数:可得map的长度。返回值类型:int
select size(map_col) from base_tb;
结果:
2
2
2
--> 2.map_keys(Map)函数:可得map中所有的key; 返回值类型: array
select map_keys(map_col) from base_tb;
结果:
["k_name","k_age"]
["k_name","k_age"]
["k_name","k_age"]
--> 3.map_values(Map)函数:可得map中所有的value; 返回值类型: array
select map_values(map_col) from base_tb;
结果:
["王五","25"]
["李四","24"]
["张三","23"]
--> 4.判断map中是否包含某个key值:这种情况较为常见,多用于一些复杂逻辑判断中;
select array_contains(map_keys(map_col),'k_name') from base_tb;
结果:
true
true
true
二、Array
1.构建
语法: array(val1, val2, …)
说明:根据输入的参数构建数组array类型
其他:
collect_set()/collect_list()
在 Hive 中想实现按某字段分组,对另外字段进行合并,可通过collect_list()或者collect_set()实现。
collect_set()与collect_list()的区别:
collect_list()函数 - - 不去重
collect_set()函数 - - 去重复
--> 1. 一般创建方法
select array('张三','李四','王五') as names1, -- 直接创建
split('张三2,李四2,王五2',',') as names2 -- 通过函数创建
-- 结果:
names1 names2
["张三","李四","王五"] ["张三2","李四2","王五2"]
--> 2. 根据SQL查询结果构建array
select id,
collect_list(name) as name_list,
collect_set(name) as name_set,
collect_set(age) as age_set
from
(
select 1 as id, '张三' as name, 23 as age
union select 1 as id, '李四' as name, 24 as age
union select 1 as id, '王五' as name, 25 as age
union select 1 as id, '王五' as name, 26 as age
)
group by
id
-- 结果:
id name_list name_set age_set
1 ["李四","张三","王五","王五"] ["李四","张三","王五"] [24,23,26,25]
2.读取
语法: A[n]
操作类型: A为array类型,n为int类型
说明:返回数组A中的第n个变量值。数组的起始下标为0,越界返回NULL。
with base_tb as (
select id,
collect_list(name) as name_list,
collect_set(name) as name_set,
collect_set(age) as age_set
from
(
select 1 as id, '张三' as name, 23 as age
union select 1 as id, '李四' as name, 24 as age
union select 1 as id, '王五' as name, 25 as age
union select 1 as id, '王五' as name, 26 as age
)
group by
id
)
select name_list,name_list[2],name_list[3] from base_tb
-- 结果:
name_list _col0 _col1
["张三","王五","王五","李四"] 王五 李四
3.其他常用函数
--> 1.array_max(array):返回数组中的最大值。
select array_max(array(1,3,5)) as c1,
array_max(array('a','b','c')) as c2
+-----+-----+--+
| c1 | c2 |
+-----+-----+--+
| 5 | c |
+-----+-----+--+
--> 2.array_min(array):返回数组中的最小值。
select array_min(array(1,3,5)) as c1,
array_min(array('a','b','c')) as c2
+-----+-----+--+
| c1 | c2 |
+-----+-----+--+
| 1 | a |
+-----+-----+--+
--> 3.array_join(array, delimiter):使用指定的分隔符将数组中的元素连接成一个字符串。与concat_ws函数功能相同,##注意参数顺序##。
select array_join(array(1,3,5),'_') as c1,
concat_ws('#',array('a','b','c')) as c2
+--------+--------+--+
| c1 | c2 |
+--------+--------+--+
| 1_3_5 | a#b#c |
+--------+--------+--+
--> 4.array_sort(array):返回一个按升序排序的数组。
select array_sort(array(1,5,3)) as c1,
array_sort(array('c','b','a')) as c2
+----------+----------------+--+
| c1 | c2 |
+----------+----------------+--+
| [1,3,5] | ["a","b","c"] |
+----------+----------------+--+
--> 5.array_contains(array, value):判断数组中是否包含指定的值。包含返回true,否则返回false。
select array_contains(array(1,2,3), 2) as c1,
array_contains(array('a','b','c'), 'd') as c2
+-------+--------+--+
| c1 | c2 |
+-------+--------+--+
| true | false |
+-------+--------+--+
--> 6.explode(array):炸裂数组(列转行);
select keyid,
cc
from
(
select 1 as keyid,
array('dbc','aef','ghij','aef','ghij','aef','ghij','aef','ghij','aef','ghij') as a
) t0
lateral view explode(a) e as cc
+-------+--------+--+
keyid cc
1 dbc
1 aef
1 ghij
1 aef
1 ghij
1 aef
1 ghij
1 aef
1 ghij
1 aef
1 ghij
--> 7.array_except(array1, array2):在数组array1中去除掉所有数组array2中的元素(数组array1比数组array2多的元素)。
select array_except(array(1,2,3,2), array(2,3)) as c1,
array_except(array('a','b','c'),array('d')) as c2
+------+----------------+--+
| c1 | c2 |
+------+----------------+--+
| [1] | ["a","b","c"] |
+------+----------------+--+
--> 8.array_intersect(array1, array2):返回一个包含所有同时在数组array1和数组array2中的元素的数组(数组array1和数组array2的交集元素)。
select array_intersect(array(1,2,3,2), array(2,3)) as c1,
array_intersect(array('a','b','c'),array('d')) as c2
+--------+-----+--+
| c1 | c2 |
+--------+-----+--+
| [2,3] | [] |
+--------+-----+--+
版权归原作者 TRX1024 所有, 如有侵权,请联系我们删除。