官网:https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/
一、基本参数
1.clickhosue-copier参数
daemon — 设置守护进程
config — 配置zookeeper文件keeper.xml路径
task-path — zk上路径(相同表名会出错)
task-file — task.xml文件路径
base-dir — 工作环境路径,默认当前路径,生成目录clickhouse-copier_日期_Pid
二、操作流程
1.新建文件夹
mkdir clickhouse-copy-data
2.创建keeper.xml
<yandex>
<logger>
<level>trace</level>
<size>100M</size>
<count>3</count>
<!--可选日志路径
<log>./log/log.log</log>
<errorlog>./log/log.err.log</errorlog>
<stderr>./log/stderr.log</stderr>
<stdout>./log/stdout.log</stdout>-->
</logger>
<zookeeper>
<node index="1">
<host>10.10.3.10</host>
<port>2181</port>
</node>
<node index="2">
<host>10.10.3.11</host>
<port>2181</port>
</node>
<node index="3">
<host>10.10.3.13</host>
<port>2181</port>
</node>
</zookeeper>
</yandex>
3.创建task.xml
<clickhouse>
<!--remote_servers 与 /etc/clickhouse-server/config.xml 下的remote_servers相同-->
<remote_servers>
<!--数据源集群-->
<source_cluster>
<!--分片1-->
<shard>
<replica>
<host>10.10.2.25</host>
<port>9100</port>
<user>default</user>
<password>d123456</password>
</replica>
</shard>
</source_cluster>
<!--目的集群-->
<destination_cluster>
<shard>
<replica>
<host>10.10.3.110</host>
<port>9100</port>
<user>default</user>
<password>d123456</password>
</replica>
</shard>
</destination_cluster>
</remote_servers>
<!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
<max_workers>2</max_workers>
<!-- Setting used to fetch (pull) data from source cluster tables -->
<settings_pull>
<readonly>1</readonly>
</settings_pull>
<!-- Setting used to insert (push) data to destination cluster tables -->
<settings_push>
<readonly>0</readonly>
</settings_push>
<!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
<settings>
<connect_timeout>3</connect_timeout>
<!-- Sync insert is set forcibly, leave it here just in case. -->
<insert_distributed_sync>1</insert_distributed_sync>
</settings>
<!-- Copying tasks description.
You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
sequentially.
-->
<tables>
<!-- A table task, copies one table. -->
<table_hits>
<!-- 指定源表 -->
<cluster_pull>source_cluster</cluster_pull>
<database_pull>test</database_pull>
<table_pull>hits</table_pull>
<!-- 指定目标表 -->
<cluster_push>destination_cluster</cluster_push>
<database_push>test</database_push>
<table_push>hits2</table_push>
<!-- 目标集群没有表的情况下,会根据下面的配置来创建表, 只支持MergeTree系列表 `create table if not exists` -->
<engine>
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
PARTITION BY toMonday(date)
ORDER BY (CounterID, EventDate)
</engine>
<!-- Sharding key used to insert data to destination cluster -->
<sharding_key>rand()</sharding_key>
<!-- 可选表达式,用于在从源服务器提取数据时筛选数据,where条件 -->
<where_condition>CounterID != 0</where_condition>
<!-- 指定分区,可以增量导入,不指定则为全量导入 -->
<enabled_partitions>
<partition>'2018-02-26'</partition>
<partition>'2018-03-05'</partition>
</enabled_partitions>
</table_hits>
<!-- 下一个table任务,按顺序执行 -->
<table_visits>
...
</table_visits>
</tables>
</clickhouse>
4.执行命令
clickhouse-copier --config keeper.xml --task-path /clickhouse/copytasks/task1/test --task-file=/home/root/clickhouse-copy-data/task.xml
版权声明:本文为m0_45993982原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。