建表:
create table row2col_1(col1 string,col2 string,col3 int)
row format delimited
fields terminated by ',';
表结构如下:
+-----------------+-----------------+-----------------+--+
| row2col_1.col1 | row2col_1.col2 | row2col_1.col3 |
+-----------------+-----------------+-----------------+--+
| a | b | 1 |
| a | b | 2 |
| a | b | 3 |
| c | d | 4 |
| c | d | 5 |
| c | d | 6 |
+-----------------+-----------------+-----------------+--+
通过使用hive sql变成如下的数据格式:
+-----------------+-----------------+-----------------+--+
| a | b | 1-2-3 |
| c | d | 4-5-6 |
+-----------------+-----------------+-----------------+--+
#此时需要两个内置的函数:
a)concat_ws(参数1,参数2),用于进行字符的拼接
参数1—指定分隔符
参数2—拼接的内容
b)collect_set(col3),它的主要作用是将某字段的值进行去重汇总,产生array类型字段
如果不想去重可用collect_list()
set集合(去重收集) list集合(非去重收集)
select collect_set(col3) from row2col_1;
+----------------+--+
| _c0 |
+----------------+--+
| [1,2,3,4,5,6] |
+----------------+--+
#先进行分组 再收集
select col1,col2,collect_set(col3) from row2col_1 group by col1,col2;
+-------+-------+----------+--+
| col1 | col2 | _c2 |
+-------+-------+----------+--+
| a | b | [1,2,3] | Array[int]
| c | d | [4,5,6] |
+-------+-------+----------+--+
#针对收集的集合 把集合中的元素根据-进行拼接变成字符串 得出最终结果
select col1,col2,concat_ws('-',collect_set(col3)) from row2col_1 group by col1,col2;
#Argument type mismatch 'col3': Argument 2 of function CONCAT_WS must be "string or array<string>", but "array<int>" was found.
#把int类转换成为string类型即可,问题来了 hive中如何类型转换呢?
#hive内置了类型转换的函数cast
比如:cast(12 as double);
#最终sql
select col1,col2,concat_ws('-',collect_set(cast(col3 as string))) from row2col_1 group by col1,col2;
+-------+-------+--------+--+
| col1 | col2 | _c2 |
+-------+-------+--------+--+
| a | b | 1-2-3 |
| c | d | 4-5-6 |
+-------+-------+--------+--+