使用Datax同步mysql,oracle,sqlserver数据到Hbase

1,Datax下载使用

在这里插入图片描述

阿里巴巴开源-数据迁移工具:

下载二进制包使用

下载,并解压到linux服务器某目录:http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz

  • 目录结构
    在这里插入图片描述
  • 测试使用:参数结构(json形式)
    在这里插入图片描述

2,关系型数据库,导入到Hbase

  • RDBMs读取clob数据 (GBK编码的),导出到hbase 乱码问题:plugin-rdbms-util模块- ->reader.CommonRdbmsReader 修改相应字符编码
  • hbase11xwriter 写 hbase,默认没有开启wal, 可以手动设置此参数 (walFlag)
"writer": {
          "name": "hbase11xwriter",
          "parameter": {
            "hbaseConfig": {
              "hbase.zookeeper.quorum": "218.245.1.135:2181"
            },
            "table": "t1",
            "mode": "normal",
            	"walFlag": "true",
            "column": [.....

从DataX主页:查看’Support Data Channels’ --> Hbase1.1: 写 ,查找hbase写入参数:https://github.com/alibaba/DataX

a,配置参考

sql查询结果:
在这里插入图片描述
配置hbase写入任务参数:
在这里插入图片描述
hbase数据查询结果:
在这里插入图片描述

b,具体json参数如下

###### 查看目标服务器:hbase版本
[root@hbase-server oracle-packs]$ hbase version
Java HotSpot(TM) 64-Bit Server VM warning: Using incremental CMS is deprecated and will likely be removed in a future release
HBase 1.2.0-cdh5.12.0
Source code repository file:///data/jenkins/workspace/generic-package-rhel64-6-0/topdir/BUILD/hbase-1.2.0-cdh5.12.0 revision=Unknown
Compiled by jenkins on Thu Jun 29 04:42:07 PDT 2017
From source with checksum 6834049453a9459ccaf4cadbf9a54b2c


######从客户端机器操作DataX: 把关系型数据库(sqlserver,mysql,oracle)数据,抽取到hbase
###### 从sqlserver-> hbase:  选择hbase-writer, sqlserver-reader
[root@c7-docker bin]# cat ../conf/sqlserver_w_hbase.json
{
    "job":{
        "setting":{ "speed": { "channel":5  } },
        "content":[{
                "reader":{
                    "name":"sqlserverreader",
                    "parameter":{
                        "username":"sa",
                        "password":"XXXabc123$",
                        "connection":[
                            {
                                "jdbcUrl":[ "jdbc:sqlserver://192.168.56.101:1433"],
                                "querySql":[
                                    "SELECT t1.id, t1.name , t2.job
                                        FROM master.dbo.t1
                                        left join t2
                                        on t1.id=t2.id"
                                ]
                            }
                        ]
                    }
                },
                "writer":{
						"name":"hbase11xwriter",
						"parameter":{
							"hbaseConfig":{"hbase.zookeeper.quorum":"192.168.56.101:2181"},
							"table":"t",
							"mode":"normal",
							"rowkeyColumn":[ {"index":0,"type":"string"}   ],
							"column":[
									 {"index":1,"name":"f:name","type":"string"},  
									 {"index":2,"name":"f:job","type":"string"}
								],
							"encoding":"utf-8"
						}
			    }
		 }]
    }
}

###### 从mysql-> hbase:  选择hbase-writer, mysql-reader
[root@c7-docker bin]# cat ../conf/mysql_w_hbase.json
...
             "reader": {
                    "name":"mysqlreader",
                    "parameter":{
                        "username":"root",
                        "password":"123456",
                        "connection":[
                            {
			  	  				"jdbcUrl":[   "jdbc:mysql://192.168.56.161:3306" ],
                                "querySql":[
                                    "select id,name from test.t1 where id > 2"
                                ]  
                            }
                        ]
                    }
                },
...

###### 从oracle-> hbase:  选择hbase-writer, oracle-reader
[root@c7-docker bin]# cat ../conf/oracle_w_hbase.json
....
            "reader": {
                    "name": "oraclereader",
                    "parameter": {
                        "username": "system",
                        "password": "orcl",
                        "connection": [
                            {
			    				"jdbcUrl": [ "jdbc:oracle:thin:@192.168.56.101:1521:orcl" ],
                                "querySql": [
                                    " select id,name,\"data\" from bi.t1 where id < 1000"
                                ]     
                            }
                        ]
                    }
                },

....

c,使用shell命令简化json配置

案例1:纯sql文本

#源SQL文件:数据源
[root@c7-docker jsons]# cat c.sql
SELECT id as id2,
name as name2,age age2
FROM t1

#目的:为了产生如下格式
{
	"name":"hbase11xwriter",
	"parameter":{
		"hbaseConfig":{"hbase.zookeeper.quorum":"192.168.56.101:2181"},
		"table":"t",
		"mode":"normal",
		"rowkeyColumn":[ {"index":0,"type":"string"}   ],
		"column":[
				 {"index":1,"name":"f:name","type":"string"},  
				 {"index":2,"name":"f:job","type":"string"}
			],
		"encoding":"utf-8"
	}
}

[root@c7-docker jsons]# awk 'BEGIN{print "{ \"index\":1, \"name\":\"f:name\", \"type\":\"string\" },"}'
{ "index":1, "name":"f:name", "type":"string" },

[root@c7-docker jsons]#  cat c.sql |xargs \
 | sed -e 's@select\(.*,.*\)from .*@\1@i' \
 |sed 's@,@\n@g' \
 |awk 'NR!=1{print "{ \"index\":"NR-1", \"name\":\"f:"$NF"\", \"type\":\"string\" },"   }'
{ "index":1, "name":"f:name2", "type":"string" },
{ "index":2, "name":"f:age2", "type":"string" },

案例2:sql带注释的文本

[root@test-c6 ~]# cat a.txt
sqoop import --connect 'jdbc:sqlserver://192.168.56.10:1433;username=sa;password=123456;databasename=DB1' --query "
select
(CASE
         xxxxxx
	 xxxxxxxxx
	 xxxxxxxxx
	 ) AS ROWKEY,
c.field1 as  FIELD1     ,--备注1
c.field2 as FIELD2        ,--备注2
'01'   as   FIELD3,--备注3
'field3' as   FIELD4,--  备注4
a.field4  as FIELD5      ,--备注5
a.field5 as FIELD6 ,--备注6
field6   as FIELD7   ,--备注7
a.field7 as FIELD8       ,--备注8
a.field8 as FIELD9,--    备注9
field9 AS FIELD10,--     备注10
a.field10 AS FIELD11,--  备注11
a.field11 AS FIELD12,--        备注12
a.field12 AS FIELD13,--  备注13
a.field13 AS FIELD14,--  备注14
cast (b.field1 as nvarchar(MAX))AS FIELD15 --备注15
 from [dbo].[TABLE1] a with(nolock)
left join [dbo].[TABLE2] b with(nolock)  on a.fild1=b.fild1
left join [dbo].[TABLE3] c  with(nolock) on  c.fild2=a.fild2
left join [dbo].[TABLE4] d  with(nolock) on c.fild3 = d.fild3
where a.fild4 >=GETDATE()-3
and \$CONDITIONS " --hbase-table HBASE_TABLE --column-family f --hbase-row-key ROWKEY --verbose -m 1

[root@test-c6 ~]#  cat a.txt  \
 |awk -F'--'  '$0 !~ /sqoop/ {print $1}' \
 |xargs -d '\n' \
 |sed -e 's@.*select\(.*ROWKEY\),\(.*,.*\)from .*@ROWKEY,\2@i' \
 |sed 's@,@\n@g' \
 |awk 'NR!=1{print "{ \"index\":"NR-1", \"name\":\"f:"$NF"\", \"type\":\"string\" },"   }'

{ "index":1, "name":"f:FIELD1", "type":"string" },
{ "index":2, "name":"f:FIELD2", "type":"string" },
{ "index":3, "name":"f:FIELD3", "type":"string" },
{ "index":4, "name":"f:FIELD4", "type":"string" },
{ "index":5, "name":"f:FIELD5", "type":"string" },
{ "index":6, "name":"f:FIELD6", "type":"string" },
{ "index":7, "name":"f:FIELD7", "type":"string" },
{ "index":8, "name":"f:FIELD8", "type":"string" },
{ "index":9, "name":"f:FIELD9", "type":"string" },
{ "index":10, "name":"f:FIELD10", "type":"string" },
{ "index":11, "name":"f:FIELD11", "type":"string" },
{ "index":12, "name":"f:FIELD12", "type":"string" },
{ "index":13, "name":"f:FIELD13", "type":"string" },
{ "index":14, "name":"f:FIELD14", "type":"string" },
{ "index":15, "name":"f:FIELD15", "type":"string" },

3,hbase数据,导出到关系型数据库

在这里插入图片描述

[root@c7-docker bin]# cat jsons/hbase_w_mysql.json
{                                                                                                                                      
    "job": {                                                                                                                           
        "setting": {                                                                                                                   
            "speed": {                                                                                                                 
                "channel": 1                                                                                                           
            }                                                                                                                          
        },                                                                                                                             
        "content": [                                                                                                                   
            {                                                                                                                          
                "reader": {                                                                                                            
                    "name": "hbase11xreader",                                                                                          
                    "parameter": {                                                                                                     
                        "hbaseConfig": {                                                                                               
                            "hbase.zookeeper.quorum": "192.168.56.22:2181"                                                             
                        },                                                                                                             
                        "table": "t",                                                                                                  
                        "encoding": "utf-8",                                                                                           
                        "mode": "normal",                                                                                              
                        "column": [                                                                                                    
                            {                                                                                                          
                                "name": "rowkey",                                                                                      
                                "type": "string"                                                                                       
                            },                                                                                                         
                            {                                                                                                          
                                "name": "f: name",                                                                                     
                                "type": "string"                                                                                       
                            },                                                                                                         
                             {                                                                                                         
                                "name": "f: job",                                                                                      
                                "type": "string"                                                                                       
                            }                                                                                                          
                                                                                                                                       
                        ],                                                                                                             
                        "range": {                                                                                                     
                            "startRowkey": "",                                                                                         
                            "endRowkey": "",                                                                                           
                            "isBinaryRowkey": true                                                                                     
                        }                                                                                                              
                    }                                                                                                                  
                },                                                                                                                     
                "writer": {                                                                                                            
                    "name":"mysqlwriter",                                                                                              
                    "parameter":{                                                                                                      
                        "writeMode":"insert",                                                                                          
                        "username":"root",                                                                                             
                        "password":"123456",                                                                                           
                        "column":[                                                                                                     
                            "id",                                                                                                      
                            "name",                                                                                                    
                            "job"                                                                                                      
                        ],                                                                                                             
                        "connection":[                                                                                                 
                            {                                                                                                          
                                "jdbcUrl":"jdbc:mysql://192.168.56.161:3306/test?useUnicode=true&characterEncoding=utf8",              
                                "table":[                                                                                              
                                    "t2"                                                                                               
                                ]                                                                                                      
                            }                                                                                                          
                        ]                                                                                                              
                    }                                                                                                                  
                }                                                                                                                      
            }                                                                                                                          
        ]                                                                                                                              
    }                                                                                                                                  
}                                                                                                                                      

4,调整任务内存

[root@localhost modules]# head /opt/datax/conf/core.json
{
    "entry": {
        "jvm": "-Xms1G -Xmx1G",
        "environment": {}
    },

5,使用datax-web界面化操作datax

简介:https://github.com/WeiYe-Jing/datax-web
安装文档:https://github.com/WeiYe-Jing/datax-web/blob/master/doc/datax-web/datax-web-deploy.md
任务调度管理:xxl-job 架构 https://www.xuxueli.com/xxl-job/#%E4%BA%94%E3%80%81%E6%80%BB%E4%BD%93%E8%AE%BE%E8%AE%A1

  • datax-web查看任务运行日志: passwd被加密,有时候需要获取明文密码快速测试数据库连接( datax-core模块:Engine类中有个过滤的方法filterSensitiveConfiguration,注释调用即可,再重新打包)
1, 下载源代码
2, 版本2.1.2连接hbase报错:找到admin模块得pom.xml, 注释hbase-protocol这个依赖
3,根据需要:修改DataxJsonServiceImpl,在hbase-writer时,jdbc-writer也自动生成列名
4,修改bug: BaseQueryTool 传入sysdate()这种函数时报错
5,重新编译打包

#默认dashboard只统计7天数据,可以修改mytatis配置,使其全部统计
[yskj@localhost ~]$ grep queryLogReport\" -A 6 /opt/datax-web-2.1.2/modules/datax-admin/conf/mybatis-mapper/JobLogReportMapper.xml
        <select id="queryLogReport" resultMap="JobLogReport">
                SELECT <include refid="Base_Column_List" />
                FROM job_log_report AS t
                ORDER BY t.trigger_day ASC
        </select>
                <!-- WHERE t.trigger_day between #{triggerDayFrom} and #{triggerDayTo} -->

6,datax-web前后端分离部署

前端使用nginx启动,后端去掉js单独启动

a, datax-admin启动参数:

在这里插入图片描述

b, 查看datax-admin的jar内容

[root@c7-docker ~]# jar -tvf /opt/datax-web-2.1.2/modules/datax-admin/lib/datax-admin-2.1.2.jar  |grep static
     0 Fri Sep 25 10:19:26 CST 2020 static/
     0 Fri Sep 25 10:19:26 CST 2020 static/static/
     0 Fri Sep 25 10:19:24 CST 2020 static/static/css/
     0 Fri Sep 25 10:19:26 CST 2020 static/static/js/
     0 Fri Sep 25 10:19:26 CST 2020 static/static/img/
     0 Fri Sep 25 10:19:26 CST 2020 static/static/fonts/
  5518 Fri Sep 25 10:19:24 CST 2020 static/index.html
 11383 Fri Sep 25 10:19:24 CST 2020 static/avatar.jpg
   271 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-35691e96.f68efe2a.css
   631 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-4ba76b80.35874984.css
 17826 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-7d41a2d3.55a106d0.css
  9955 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-52071f51.9a886743.css
   888 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-7ff5eb73.adc8ba5c.css
  4748 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-556562e8.b043920c.css
   745 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-bb42af12.13821f5e.css
  2399 Fri Sep 25 10:19:24 CST 2020 static/static/css/chunk-753b0532.41eeb29d.css
251446 Fri Sep 25 10:19:24 CST 2020 static/static/css/app.b01abca2.css
....

c, 打包datax-admin的时候,把resources/static目录移除,只打包后台相关内容

d, 把resources/static放到服务器的nginx目录下

  • 1, 配置nginx
[root@c7-docker ~]# cat /etc/nginx/conf.d/datax.conf
server {
    listen 82;
    # server_name _;
    location / {
        try_files $uri / /index.html;
        alias /usr/share/nginx/html/datax-web/;
        expires 24h;
    }

    location /api/ {
        proxy_pass http://localhost:65080;
          proxy_set_header X-Real-IP $remote_addr;
          proxy_set_header Host $host;
          proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    }
}
  • 2, 启动nginx, 和后台的datax-admin/executor服务
    在这里插入图片描述

版权声明:本文为eyeofeagle原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。