从struct 类型的 named_struct_1字段 解析其中内容:page_id, page_name, log_date
create table tmp_user_scan_info_struct_mid stored as orc as
select
uid,
named_struct('page_id', page_id,
'page_name', page_name,
'log_date', log_date
) as named_struct_1
from tmp_user_scan_info;
select * from tmp_user_scan_info_struct_mid;
+------+---------------------------------------------------------+--+
| uid | named_struct_1 |
+------+---------------------------------------------------------+--+
| 0 | {"page_id":10,"page_name":"A","log_date":"2015-01-01"} |
| 0 | {"page_id":20,"page_name":"B","log_date":"2015-01-02"} |
| 1 | {"page_id":10,"page_name":"A","log_date":"2016-02-01"} |
| 1 | {"page_id":20,"page_name":"B","log_date":"2016-02-02"} |
+------+---------------------------------------------------------+--+
select
uid,
page_id,
page_name,
log_date
from tmp_user_scan_info_struct_mid t
lateral view outer inline(array(named_struct_1)) tmp as page_id, page_name, log_date;
select inline(array(named_struct_1)) from tmp_user_scan_info_struct_mid;
+------------+----------+-------------+--+
| page_name | page_id | log_date |
+------------+----------+-------------+--+
| A | 10 | 2015-01-01 |
| B | 20 | 2016-02-02 |
+------------+----------+-------------+--+
-------------------- 支持多列别名(多字段名)---------------
select inline( array(struct('A',10,date '2015-01-01'),
struct('B',20,date '2016-02-02')
)
) as (page_name, page_id, log_date);
+------------+----------+-------------+--+
| page_name | page_id | log_date |
+------------+----------+-------------+--+
| A | 10 | 2015-01-01 |
| B | 20 | 2016-02-02 |
+------------+----------+-------------+--+
--数据准备 ----------------------------------------------
create table tmp_user_scan_info stored as orc as
select 0 as uid, 10 as page_id, 'A' as page_name, '2015-01-01' as log_date union all
select 0 as uid, 20 as page_id, 'B' as page_name, '2015-01-02' as log_date union all
select 1 as uid, 10 as page_id, 'A' as page_name, '2016-02-01' as log_date union all
select 1 as uid, 20 as page_id, 'B' as page_name, '2016-02-02' as log_date;
select * from tmp_user_scan_info;
+------+----------+------------+-------------+--+
| uid | page_id | page_name | log_date |
+------+----------+------------+-------------+--+
| 0 | 10 | A | 2015-01-01 |
| 0 | 20 | B | 2015-01-02 |
| 1 | 10 | A | 2016-02-01 |
| 1 | 20 | B | 2016-02-02 |
+------+----------+------------+-------------+--+
----- 多行转多列 ---------
select
uid,
concat_ws(',',
collect_list(concat_ws('|', array(page_id, page_name, log_date)))
) as extend_info
from tmp_user_scan_info
group by uid;
+------+----------------------------------+--+
| uid | extend_info |
+------+----------------------------------+--+
| 0 | 10|A|2015-01-01,20|B|2015-01-02 |
| 1 | 10|A|2016-02-01,20|B|2016-02-02 |
+------+----------------------------------+--+
select
uid,
concat_ws(',', collect_set( concat_ws('|',
cast(page_id as string),
page_name,
log_date
)
)
) as pageId_vs_pageName_vs_logDate
from tmp_user_scan_info_struct_mid t
lateral view outer inline(array(named_struct_1)) tmp as page_id, page_name, log_date
group by uid;
+------+----------------------------------+--+
| uid | pageid_vs_pagename_vs_logdate |
+------+----------------------------------+--+
| 0 | 10|A|2015-01-01,20|B|2015-01-02 |
| 1 | 10|A|2016-02-01,20|B|2016-02-02 |
+------+----------------------------------+--+
版权声明:本文为JnYoung原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。