MySql拉链表算法实现

 拉链表是针对数据仓库设计中表存储数据的方式而定义的,顾名思义,所谓拉链,就是记录历史。记录一个事物从开始,一直到当前状态的所有变化的信息

数据状态的变化分类

对数据的操作

没有变化的数据

 不做任何操作

 insert的数据

开链,结束日期置为最大日期(当前有效)

 delete的数据

关链(结束日期为变化日)
update的数据拉链(开链、关链)

一.DDL

-- 源表
DROP TABLE IF EXISTS SRC;
CREATE TABLE IF NOT EXISTS `SRC` (
  `ID` int(11) DEFAULT NULL,
  `NAME` varchar(255) DEFAULT NULL,
  `BAL`  int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- 拉链目标表
DROP TABLE IF EXISTS TAG;
CREATE TABLE IF NOT EXISTS `TAG` (
  `ID` int(11) DEFAULT NULL,
  `NAME` varchar(255) DEFAULT NULL,
  `BAL`  int(11) DEFAULT NULL,
  `START_DT`  Date DEFAULT NULL,
  `END_DT`  Date DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- 当天数据存放临时表
drop table if exists VT_NEW;
create table if not exists VT_NEW like TAG; 

-- 当天数据新增变化临时表
drop table if exists VT_INC;
create table if not exists VT_INC like TAG; 

二.第一次初始数据入拉链表


001 源表数据插入

insert into SRC values(101,'王五',100);
insert into SRC values(102,'赵四',50);
+-----+------+------+
| ID  | NAME | BAL  |
+-----+------+------+
| 101 | 王五 |  100 |
| 102 | 赵四 |   50 |
+-----+------+------+


002 初始数据入目标表(第一次跑数,初始数据直接关链)

insert into TAG(id,name,bal,start_dt,end_dt)
select t.id,t.name,t.BAL,CURRENT_DATE,'2099-12-31'
from SRC t;
+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 |  100 | 2022-03-17 | 2099-12-31 |
| 102 | 赵四 |   50 | 2022-03-17 | 2099-12-31 |
+-----+------+------+------------+------------+

三.T+1增量及更新数据拉链算法

001 源表数据插入及更新

update SRC set bal='1000' where id=101;
insert into SRC values(103,'张三',10);
+-----+------+------+
| ID  | NAME | BAL  |
+-----+------+------+
| 101 | 王五 | 1000 |
| 102 | 赵四 |   50 |
| 103 | 张三 |   10 |
+-----+------+------+


002 VT_NEW存放源数据,直接开链

drop table if exists VT_NEW;
create table if not exists VT_NEW like TAG; 

insert into  VT_NEW(ID,NAME,BAL,START_DT,END_DT)
SELECT    ID,NAME,BAL,date_add(CURRENT_DATE,interval 1 day) as START_DT /*模拟T+1*/,'2099-12-31' as END_DT
FROM    SRC ;

+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 | 1000 | 2022-03-18 | 2099-12-31 |
| 102 | 赵四 |   50 | 2022-03-18 | 2099-12-31 |
| 103 | 张三 |   10 | 2022-03-18 | 2099-12-31 |
+-----+------+------+------------+------------+

003  VT_INC存放出对比出的新增及修改数据

drop table if exists VT_INC;
create table if not exists VT_INC like TAG; 

insert into VT_INC(ID,NAME,BAL,START_DT,END_DT)
select ID,NAME,BAL,START_DT,END_DT
from VT_NEW 
where (ID,NAME,BAL) not in (select ID,NAME,BAL from TAG where END_DT='2099-12-31');

+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 | 1000 | 2022-03-18 | 2099-12-31 |
| 103 | 张三 |   10 | 2022-03-18 | 2099-12-31 |
+-----+------+------+------------+------------+


004 更新目标表需要做拉链的数据,将原数据关链

UPDATE TAG
SET END_DT = date_add(CURRENT_DATE,interval 1 day)/*模拟T+1*/
WHERE END_DT = '2099-12-31' AND ID IN (SELECT ID FROM VT_INC);
+-----+------+-----+------------+------------+
| ID  | NAME | BAL | START_DT   | END_DT     |
+-----+------+-----+------------+------------+
| 101 | 王五 | 100 | 2022-03-17 | 2022-03-18 |
| 102 | 赵四 |  50 | 2022-03-17 | 2099-12-31 |
+-----+------+-----+------------+------------+


005 修改目标表进行开链操作

insert into TAG(ID,NAME,BAL,START_DT,END_DT)
select ID,NAME,BAL,START_DT,'2099-12-31'
from VT_INC;
+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 |  100 | 2022-03-17 | 2022-03-18 |
| 101 | 王五 | 1000 | 2022-03-18 | 2099-12-31 |
| 102 | 赵四 |   50 | 2022-03-17 | 2099-12-31 |
| 103 | 张三 |   10 | 2022-03-18 | 2099-12-31 |
+-----+------+------+------------+------------+


三.T+1用户数据移除 拉链数据删除算法


001 源表数据移除

delete from SRC where id=101;
+-----+------+-----+
| ID  | NAME | BAL |
+-----+------+-----+
| 102 | 赵四 |  50 |
| 103 | 张三 |  10 |
+-----+------+-----+


002 VT_NEW存放源数据,直接开链

drop table if exists VT_NEW;
create table if not exists VT_NEW like TAG; 

insert into  VT_NEW(ID,NAME,BAL,START_DT,END_DT)
SELECT    ID,NAME,BAL,date_add(CURRENT_DATE,interval 1 day) as START_DT /*模拟T+1*/,'2099-12-31' as END_DT
FROM    SRC ;
+-----+------+-----+------------+------------+
| ID  | NAME | BAL | START_DT   | END_DT     |
+-----+------+-----+------------+------------+
| 102 | 赵四 |  50 | 2022-03-18 | 2099-12-31 |
| 103 | 张三 |  10 | 2022-03-18 | 2099-12-31 |
+-----+------+-----+------------+------------+


003  VT_INC存放出对比出的新增及修改数据

drop table if exists VT_INC;
create table if not exists VT_INC like TAG; 

INSERT    INTO VT_INC(ID,NAME,BAL,START_DT,END_DT)
SELECT    ID,NAME,BAL,START_DT,date_add(CURRENT_DATE,interval 2 day) /*模拟T+2数据移除了*/
FROM    TAG
WHERE    END_DT = '2099-12-31'    AND (ID,NAME,BAL) NOT IN (
SELECT    ID,NAME,BAL 
FROM    VT_NEW ) ;

+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 | 1000 | 2022-03-18 | 2022-03-19 |
+-----+------+------+------------+------------+


004 修改目标表进行关链操作

UPDATE TAG
SET END_DT = date_add(CURRENT_DATE,interval 2 day)/*模拟T+1*/
WHERE END_DT = '2099-12-31' AND ID IN (SELECT ID FROM VT_INC);

+-----+------+------+------------+------------+
| ID  | NAME | BAL  | START_DT   | END_DT     |
+-----+------+------+------------+------------+
| 101 | 王五 |  100 | 2022-03-17 | 2022-03-18 |
| 101 | 王五 | 1000 | 2022-03-18 | 2022-03-19 |
| 102 | 赵四 |   50 | 2022-03-17 | 2099-12-31 |
| 103 | 张三 |   10 | 2022-03-18 | 2099-12-31 |
+-----+------+------+------------+------------+


版权声明:本文为qq_43227716原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。