Hive常用技巧记录

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF

一、函数部分

1. 时间函数

-- 2021-12-17 12:35:31.123,含毫秒格式，screenshottime是map字段
select 
  from_unixtime(cast(screenshottime as bigint) / 1000,'yyyy-MM-dd HH:mm:ss.sss') as screenshottime 
from (select 1612851637476 as screenshottime) a
;

在这里插入图片描述

with a as (
select '20220310' dt
union all
select '2022-03-07' dt
)
select dt
,regexp_replace(dt,'-','')
,from_unixtime(unix_timestamp(regexp_replace(dt,'-',''),'yyyymmdd'),'yyyy-mm-dd') 
,case when length(dt)=8 then from_unixtime(unix_timestamp(dt,'yyyymmdd'),'yyyy-mm-dd') 
  else dt 
 end
from a
;

在这里插入图片描述

2. 拆分函数

select
  param
 ,split(param,'&') aab
 ,concat_ws(',',split(param,'&')) 
 ,str_to_map(concat_ws(',',split(param,'&')) ,',','=') as jsonparam
 ,size(str_to_map(concat_ws(',',split(param,'&')) ,',','=')) 
from (select '?type=onlineProductPageList&tag=0&productId=166&productType=Online' param ) a

在这里插入图片描述

3. 列转行函数

select explode(split(concat_ws(',',122,343,4565),',')) ;

在这里插入图片描述

4. 行转列函数

注：本例将除uid之外的列都转到行，其中collect_set会去重，collect_list不会去重

with t as (
select 1101 uid ,-1 cityid,'F' value,1066 labelid   -- 性别
union all
select 1111 uid ,-1 cityid,'M' value,1066 labelid
union all
select 1111 uid ,-1 cityid,'55' value,1175 labelid   -- 年龄
union all
select 1101 uid ,-1 cityid,'28' value,1175 labelid
)
,t1 (
select 
   concat('"uid":','"',uid,'"') as uid
   ,explode(
             split(
                     concat_ws(','
                               ,concat('"cityid":','"',cityid,'"')
                               ,concat('"labelid":','"',labelid,'"')
                               ,concat('"value":','"',value,'"')
                              )
                    ,','
                  )
          ) col
from t
)
select
  uid,collect_set(col)
from t1
group by uid
;

在这里插入图片描述

4.1 string 类型list 现行行转列

with t as (
select '["621","32"]' col
)
select 
  col
  ,regexp_replace(col,'\\[|\\]','') col1
  ,split(regexp_replace(col,'\\[|\\]',''),',') col2
  ,explode(split(regexp_replace(col,'\\[|\\]',''),',')) col3
from t

在这里插入图片描述

5. json格式字符串作为列进行查询

json字符串：

{
    "uid":"111445478",
    "age":"8061",
    "sex":"F",
    "home":"15",
    "residence":"2",
    "regdate":"2014-01-15",
    "lastvisit":"2022-02-08",
    "pref":[
        {
            "cityid":"4",
            "labelid ":"7291",
            "value":"0.54849154"
        },
        {
            "cityid":"12",
            "labelid ":"5391",
            "value":"0.54849154"
        }
    ]
}

json sql(只要在双引号前加反斜杠“\”即可)：

with j as (
select
"
{
    \"uid\":\"111445478\",
    \"age\":\"8061\",
    \"sex\":\"F\",
    \"home\":\"15\",
    \"residence\":\"2\",
    \"regdate\":\"2014-01-15\",
    \"lastvisit\":\"2022-02-08\",
    \"pref\":[
        {
            \"cityid\":\"4\",
            \"labelid \":\"7291\",
            \"value\":\"0.54849154\"
        },
        {
            \"cityid\":\"12\",
            \"labelid \":\"5391\",
            \"value\":\"0.54849154\"
        }
    ]
}
" as js 
)
select 
  get_json_object(js, '$.uid') uid
 ,get_json_object(js, '$.age') age
 ,get_json_object(js, '$.sex') sex
 ,get_json_object(js, '$.home') home
 ,get_json_object(js, '$.residence') residence
 ,get_json_object(js, '$.regdate') regdate
 ,get_json_object(js, '$.lastvisit') lastvisit
 ,get_json_object(js, '$.pref') pref
from j

在这里插入图片描述

6.JSON数组解析

在这里插入图片描述
SQL 脚本：

with a as (
select '
[{"globalid":1,"geocategoryid":10000,"type":"base","name":"亚洲","ename":"Asia"},{"globalid":1,"geocategoryid":1,"type":"base","name":"中国","ename":"China"},{"globalid":18,"geocategoryid":2,"type":"base","name":"江西","ename":"Jiangxi"},{"globalid":268,"geocategoryid":3,"type":"base","name":"赣州","ename":"Ganzhou"},{"globalid":3971,"geocategoryid":3,"type":"base","name":"瑞金","ename":"Ruijin"}]
' as col
,1 as id
)
,b (
select id,
 explode(split(regexp_replace(regexp_replace(regexp_replace(col,' ',''), '\\[|\\]',''),'\\}\\,\\{','\\}\\|\\{'),'\\|')) tags
from a
)
,c (
select 
  b.id
  ,c.globalid,c.geocategoryid,c.type,c.name,c.ename
from b lateral view json_tuple(tags,'globalid','geocategoryid','type','name','ename') c as globalid,geocategoryid,type,name,ename
)
select 
  c.id
  ,max(case when geocategoryid=3 then globalid end) as city						-- 城市
  ,max(case when geocategoryid=3 then name end) as cityname
  ,max(case when geocategoryid=3 then ename end) as cityename
  ,max(case when geocategoryid=2 then globalid end) as province				-- 省份
  ,max(case when geocategoryid=2 then name end) as provincename
  ,max(case when geocategoryid=2 then ename end) as provinceename
  ,max(case when geocategoryid=1 then globalid end) as country					-- 国家
  ,max(case when geocategoryid=1 then name end) as  countryname
  ,max(case when geocategoryid=1 then ename end) as  countryename
  ,max(case when geocategoryid=10000 then globalid end) as continent		-- 洲际
  ,max(case when geocategoryid=10000 then name end) as continentname
  ,max(case when geocategoryid=10000 then ename end) as continentename
from c 
group by id

输出
在这里插入图片描述

7.NVL函数

NVL(expr1,expr2),如果第一个参数为NULL（空）那么显示第二个参数值，如果第一个参数的值不为null（空），则显示第一个参数值

select nvl(NULL,2) a,nvl('',2) b ;

在这里插入图片描述

8. Coalesce函数

Coalese函数与NVL函数相似，优势在于更多的选项。
Coalesce(expr1, expr2, expr3…… exprn)，所有表达式类型必须相同，或可以隐性转换为相同类型，若所有参数均为 NULL，则返回 NULL .

9. 随机函数（指定位数）

select 
 cast(ceiling(rand() * 1000000000000) as bigint) num1 
,cast(floor(rand() * 1000000000000) as bigint) num2

在这里插入图片描述

10. 复杂数据结构处理

array
map
struct

二、Hive sql 传参

set dt=2021-11-30;
with t1 as (
    select uid
    from dwhtldb.edw_usr_uf_user_ordlist
    where d='${hivevar:dt}'
    group by uid
)
,t2 (
    select uid
    from dwhtldb.edw_usr_uf_user_ordlist
    where d=date_add('${hivevar:dt}',1)
    group by uid
)
select
    count(1)
from t1 join t2 on t1.uid=t2.uid

三、Hive sql NULL与空字符的表示方法

NULL不是空值而是特殊值，不参与任何计算，
如与NULL计算，返回值仍为NULL .

with a as (
select 
       'NULL' a     -- NULL字符串，长度为4
      ,' ' b        -- 空格字符串，长度为1
      ,NULL c       -- NULL,空的显示表示法，不参与任何计算
)
select length(a),length(b),length(c)
from a

在这里插入图片描述

原文链接：https://blog.csdn.net/yipiantian/article/details/122713642