0


Hive/SparkSQL中Map、Array的基本使用和转换

一、Map

1. 构建

语法: map (key1, value1, key2, value2, …)
说明:根据输入的key和value对构建map类型

--> 1. 一般创建方法
select map('key1_name','张三','key2_age',20) as map_col
-- 结果:
{"key1_name":"张三","key2_age":"20"}

--> 2. 根据SQL查询结果构建map
select  map('k_name',name,'k_age',age) as map_col
from
(   
                select  '张三' as name, 23 as age
        union   select  '李四' as name, 24 as age
        union   select  '王五' as name, 25 as age
)
-- 结果:
{"k_age":"25","k_name":"王五"}
{"k_age":"23","k_name":"张三"}
{"k_age":"24","k_name":"李四"}

2. 读取

语法: M[key]
操作类型: M为map类型,key为map中的key值
说明:返回map类型M中,key值为指定值的value值。

with base_tb as (
    select  map('k_name',name,'k_age',age) as map_col
    from
    (   
                    select  '张三' as name, 23 as age
            union   select  '李四' as name, 24 as age
            union   select  '王五' as name, 25 as age
    )
)

select  map_col['k_name'] as col from base_tb
-- 结果:
王五
张三
李四

3. 其他常用函数

--> 1.size(Map)函数:可得map的长度。返回值类型:int
select size(map_col) from base_tb;
结果: 
2
2
2

--> 2.map_keys(Map)函数:可得map中所有的key; 返回值类型: array
select map_keys(map_col) from base_tb;
结果: 
["k_name","k_age"]
["k_name","k_age"]
["k_name","k_age"]

--> 3.map_values(Map)函数:可得map中所有的value; 返回值类型: array
select map_values(map_col) from base_tb;
结果: 
["王五","25"]
["李四","24"]
["张三","23"]

--> 4.判断map中是否包含某个key值:这种情况较为常见,多用于一些复杂逻辑判断中;
select array_contains(map_keys(map_col),'k_name') from base_tb;
结果: 
true
true
true

二、Array

1.构建

语法: array(val1, val2, …)
说明:根据输入的参数构建数组array类型

其他:

collect_set()/collect_list()

在 Hive 中想实现按某字段分组,对另外字段进行合并,可通过collect_list()或者collect_set()实现。

collect_set()与collect_list()的区别:
collect_list()函数 - - 不去重
collect_set()函数 - - 去重复

--> 1. 一般创建方法
select  array('张三','李四','王五')    as names1, -- 直接创建
        split('张三2,李四2,王五2',',') as names2  -- 通过函数创建
-- 结果:
names1                  names2
["张三","李四","王五"]    ["张三2","李四2","王五2"]

--> 2. 根据SQL查询结果构建array
select  id,
        collect_list(name) as name_list,
        collect_set(name)  as name_set,
        collect_set(age)   as age_set
from
(   
                select 1 as id, '张三' as name, 23 as age
        union   select 1 as id, '李四' as name, 24 as age
        union   select 1 as id, '王五' as name, 25 as age
        union   select 1 as id, '王五' as name, 26 as age
)
group by 
        id
-- 结果:
id  name_list                     name_set                age_set
1   ["李四","张三","王五","王五"]   ["李四","张三","王五"]    [24,23,26,25]

2.读取

语法: A[n]
操作类型: A为array类型,n为int类型
说明:返回数组A中的第n个变量值。数组的起始下标为0,越界返回NULL。

with base_tb as (
    select  id,
            collect_list(name) as name_list,
            collect_set(name)  as name_set,
            collect_set(age)   as age_set
    from
    (   
                    select 1 as id, '张三' as name, 23 as age
            union   select 1 as id, '李四' as name, 24 as age
            union   select 1 as id, '王五' as name, 25 as age
            union   select 1 as id, '王五' as name, 26 as age
    )
    group by 
            id
)

select name_list,name_list[2],name_list[3] from base_tb

-- 结果:
name_list                   _col0    _col1
["张三","王五","王五","李四"]  王五      李四

3.其他常用函数

--> 1.array_max(array):返回数组中的最大值。  
select  array_max(array(1,3,5))       as c1,
        array_max(array('a','b','c')) as c2
+-----+-----+--+
| c1  | c2  |
+-----+-----+--+
| 5   | c   |
+-----+-----+--+

--> 2.array_min(array):返回数组中的最小值。
select  array_min(array(1,3,5))       as c1,
        array_min(array('a','b','c')) as c2
+-----+-----+--+
| c1  | c2  |
+-----+-----+--+
| 1   | a   |
+-----+-----+--+

--> 3.array_join(array, delimiter):使用指定的分隔符将数组中的元素连接成一个字符串。与concat_ws函数功能相同,##注意参数顺序##。
select  array_join(array(1,3,5),'_')      as c1,
        concat_ws('#',array('a','b','c')) as c2
+--------+--------+--+
|   c1   |   c2   |
+--------+--------+--+
| 1_3_5  | a#b#c  |
+--------+--------+--+

--> 4.array_sort(array):返回一个按升序排序的数组。 
select array_sort(array(1,5,3))       as c1,
       array_sort(array('c','b','a')) as c2
+----------+----------------+--+
|    c1    |       c2       |
+----------+----------------+--+
| [1,3,5]  | ["a","b","c"]  |
+----------+----------------+--+

--> 5.array_contains(array, value):判断数组中是否包含指定的值。包含返回true,否则返回false。
select array_contains(array(1,2,3), 2)         as c1,
       array_contains(array('a','b','c'), 'd') as c2
+-------+--------+--+
|  c1   |   c2   |
+-------+--------+--+
| true  | false  |
+-------+--------+--+

--> 6.explode(array):炸裂数组(列转行);
select  keyid,
        cc
from 
(
        select  1 as keyid,
                array('dbc','aef','ghij','aef','ghij','aef','ghij','aef','ghij','aef','ghij') as a
) t0
lateral view explode(a) e as cc
+-------+--------+--+
keyid   cc
1       dbc
1       aef
1       ghij
1       aef
1       ghij
1       aef
1       ghij
1       aef
1       ghij
1       aef
1       ghij

--> 7.array_except(array1, array2):在数组array1中去除掉所有数组array2中的元素(数组array1比数组array2多的元素)。
select array_except(array(1,2,3,2),    array(2,3)) as c1,
       array_except(array('a','b','c'),array('d')) as c2
+------+----------------+--+
|  c1  |       c2       |
+------+----------------+--+
| [1]  | ["a","b","c"]  |
+------+----------------+--+

--> 8.array_intersect(array1, array2):返回一个包含所有同时在数组array1和数组array2中的元素的数组(数组array1和数组array2的交集元素)。
select array_intersect(array(1,2,3,2),    array(2,3)) as c1,
       array_intersect(array('a','b','c'),array('d')) as c2
+--------+-----+--+
|   c1   | c2  |
+--------+-----+--+
| [2,3]  | []  |
+--------+-----+--+

本文转载自: https://blog.csdn.net/weixin_43161811/article/details/135333522
版权归原作者 TRX1024 所有, 如有侵权,请联系我们删除。

“Hive/SparkSQL中Map、Array的基本使用和转换”的评论:

还没有评论