3节点上部署3分片、2副本ClickHouse测试集群
部署背景
ClickHouse3分片、2副本部署场景下,按照每节点一个ClickHouse实例则需要6个节点进行部署。如果想进一步减少节点数,则需要在每个节点上部署多个ClickHouse实例。以3节点为例,需要每节点上部署2个ClickHouse实例。本文将推荐一种3节点上部署3分片、2副本的环形副本方案,每个节点上只需要部署一个ClickHouse实例,将分片对应单独的数据库,通过数据库之间的副本完成分片的副本功能。
部署方案
方案的关键点如下:
- 每一分片对应一个单独的数据库。
- 为每一分片在每个节点上设置不同的default_database 。
- 在分布式表的建表语句中使用空的database参数,这样系统将使用配置的default_database进行路由。
- 为每一分片设置internal_replication参数为true。
其示意图如下所示:
部署过程
机器环境及部署规划
$ cat /etc/hosts
10.101.241.1 ch-host01
10.101.241.2 ch-host02
10.101.241.3 ch-host03
节点1 |
节点2 |
节点3 |
|
主机名 |
ch-host01 |
ch-host02 |
ch-host03 |
IP地址 |
10.101.241.1 |
10.101.241.2 |
10.101.241.3 |
分片/副本 |
分片1, 副本1 分片3, 副本2 |
分片1, 副本2 分片2, 副本1 |
分片2, 副本2 分片3, 副本1 |
数据库 |
test, test_01, test_03 |
test, test_02, test_01 |
test, test_03, test_02 |
节点配置
集群名称为test_cluster,在每一节点上新建文件/etc/clickhouse-server/config.d/test_cluster.xml,内容如下:
<?xml version="1.0"?>
<clickhouse>
<remote_servers>
<test_cluster>
<shard>
<internal_replication>true</internal_replication>
<replica>
<default_database>test_01</default_database>
<host>ch-host01</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
<replica>
<default_database>test_01</default_database>
<host>ch-host02</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<default_database>test_02</default_database>
<host>ch-host02</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
<replica>
<default_database>test_02</default_database>
<host>ch-host03</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<default_database>test_03</default_database>
<host>ch-host03</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
<replica>
<default_database>test_03</default_database>
<host>ch-host01</host>
<port>9001</port>
<user>default</user>
<password>default_password</password>
</replica>
</shard>
</test_cluster>
</remote_servers>
</clickhouse>
之后在每一节点上分别定义宏变量,新建/etc/clickhouse-server/config.d/test_macros.xml
内容分别如下
- 节点1
<?xml version="1.0"?>
<clickhouse>
<macros>
<test_cluster>test_cluster</test_cluster>
<test_shard01>s1</test_shard01>
<test_shard02>s3</test_shard02>
<test_replica01>s1_r1</test_replica01>
<test_replica02>s3_r2</test_replica02>
</macros>
</clickhouse>
- 节点2
<?xml version="1.0"?>
<clickhouse>
<macros>
<test_cluster>test_cluster</test_cluster>
<test_shard01>s2</test_shard01>
<test_shard02>s1</test_shard02>
<test_replica01>s2_r1</test_replica01>
<test_replica02>s1_r2</test_replica02>
</macros>
</clickhouse>
- 节点3
<?xml version="1.0"?>
<clickhouse>
<macros>
<test_cluster>test_cluster</test_cluster>
<test_shard01>s3</test_shard01>
<test_shard02>s2</test_shard02>
<test_replica01>s3_r1</test_replica01>
<test_replica02>s2_r2</test_replica02>
</macros>
</clickhouse>
修改以上文件权限并在主配置文件中包含:
sudo chown -R clickhouse:clickhouse /etc/clickhouse-server/config.d
<include_from>/etc/clickhouse-server/config.d/test_cluster.xml</include_from>
<include_from>/etc/clickhouse-server/config.d/test_macros.xml</include_from>
创建数据库
- 节点1
CREATE DATABASE IF NOT EXISTS test;
CREATE DATABASE IF NOT EXISTS test_01;
CREATE DATABASE IF NOT EXISTS test_03;
- 节点2
CREATE DATABASE IF NOT EXISTS test;
CREATE DATABASE IF NOT EXISTS test_02;
CREATE DATABASE IF NOT EXISTS test_01;
- 节点3
CREATE DATABASE IF NOT EXISTS test;
CREATE DATABASE IF NOT EXISTS test_03;
CREATE DATABASE IF NOT EXISTS test_02;
创建数据表
数据表采用ClickHouse官网提供的hits表,下载地址:
https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz
下载后解压缩:
curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
本地表
- 节点1
CREATE TABLE test_01.hits_shard
(
`WatchID` UInt64,
`JavaEnable` UInt8,
`Title` String,
`GoodEvent` Int16,
`EventTime` DateTime,
`EventDate` Date,
`CounterID` UInt32,
`ClientIP` UInt32,
`ClientIP6` FixedString(16),
`RegionID` UInt32,
`UserID` UInt64,
`CounterClass` Int8,
`OS` UInt8,
`UserAgent` UInt8,
`URL` String,
`Referer` String,
`URLDomain` String,
`RefererDomain` String,
`Refresh` UInt8,
`IsRobot` UInt8,
`RefererCategories` Array(UInt16),
`URLCategories` Array(UInt16),
`URLRegions` Array(UInt32),
`RefererRegions` Array(UInt32),
`ResolutionWidth` UInt16,
`ResolutionHeight` UInt16,
`ResolutionDepth` UInt8,
`FlashMajor` UInt8,
`FlashMinor` UInt8,
`FlashMinor2` String,
`NetMajor` UInt8,
`NetMinor` UInt8,
`UserAgentMajor` UInt16,
`UserAgentMinor` FixedString(2),
`CookieEnable` UInt8,
`JavascriptEnable` UInt8,
`IsMobile` UInt8,
`MobilePhone` UInt8,
`MobilePhoneModel` String,
`Params` String,
`IPNetworkID` UInt32,
`TraficSourceID` Int8,
`SearchEngineID` UInt16,
`SearchPhrase` String,
`AdvEngineID` UInt8,
`IsArtifical` UInt8,
`WindowClientWidth` UInt16,
`WindowClientHeight` UInt16,
`ClientTimeZone` Int16,
`ClientEventTime` DateTime,
`SilverlightVersion1` UInt8,
`SilverlightVersion2` UInt8,
`SilverlightVersion3` UInt32,
`SilverlightVersion4` UInt16,
`PageCharset` String,
`CodeVersion` UInt32,
`IsLink` UInt8,
`IsDownload` UInt8,
`IsNotBounce` UInt8,
`FUniqID` UInt64,
`HID` UInt32,
`IsOldCounter` UInt8,
`IsEvent` UInt8,
`IsParameter` UInt8,
`DontCountHits` UInt8,
`WithHash` UInt8,
`HitColor` FixedString(1),
`UTCEventTime` DateTime,
`Age` UInt8,
`Sex` UInt8,
`Income` UInt8,
`Interests` UInt16,
`Robotness` UInt8,
`GeneralInterests` Array(UInt16),
`RemoteIP` UInt32,
`RemoteIP6` FixedString(16),
`WindowName` Int32,
`OpenerName` Int32,
`HistoryLength` Int16,
`BrowserLanguage` FixedString(2),
`BrowserCountry` FixedString(2),
`SocialNetwork` String,
`SocialAction` String,
`HTTPError` UInt16,
`SendTiming` Int32,
`DNSTiming` Int32,
`ConnectTiming` Int32,
`ResponseStartTiming` Int32,
`ResponseEndTiming` Int32,
`FetchTiming` Int32,
`RedirectTiming` Int32,
`DOMInteractiveTiming` Int32,
`DOMContentLoadedTiming` Int32,
`DOMCompleteTiming` Int32,
`LoadEventStartTiming` Int32,
`LoadEventEndTiming` Int32,
`NSToDOMContentLoadedTiming` Int32,
`FirstPaintTiming` Int32,
`RedirectCount` Int8,
`SocialSourceNetworkID` UInt8,
`SocialSourcePage` String,
`ParamPrice` Int64,
`ParamOrderID` String,
`ParamCurrency` FixedString(3),
`ParamCurrencyID` UInt16,
`GoalsReached` Array(UInt32),
`OpenstatServiceName` String,
`OpenstatCampaignID` String,
`OpenstatAdID` String,
`OpenstatSourceID` String,
`UTMSource` String,
`UTMMedium` String,
`UTMCampaign` String,
`UTMContent` String,
`UTMTerm` String,
`FromTag` String,
`HasGCLID` UInt8,
`RefererHash` UInt64,
`URLHash` UInt64,
`CLID` UInt32,
`YCLID` UInt64,
`ShareService` String,
`ShareURL` String,
`ShareTitle` String,
`ParsedParams` Nested(
Key1 String,
Key2 String,
Key3 String,
Key4 String,
Key5 String,
ValueDouble Float64),
`IslandID` FixedString(16),
`RequestNum` UInt32,
`RequestTry` UInt8
)
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard01}/tables/hits', '{test_replica01}')
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);
CREATE TABLE test_03.hits_shard
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard02}/tables/hits', '{test_replica02}')
<!----- 此处省略,从之前SQL中复制 ----->
- 节点2
CREATE TABLE test_02.hits_shard
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard01}/tables/hits', '{test_replica01}')
<!----- 此处省略,从之前SQL中复制 ----->
CREATE TABLE test_01.hits_shard
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard02}/tables/hits', '{test_replica02}')
<!----- 此处省略,从之前SQL中复制 ----->
- 节点3
CREATE TABLE test_03.hits_shard
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard01}/tables/hits', '{test_replica01}')
<!----- 此处省略,从之前SQL中复制 ----->
CREATE TABLE test_02.hits_shard
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE=ReplicatedMergeTree('/clickhouse/{test_cluster}/{test_shard02}/tables/hits', '{test_replica02}')
<!----- 此处省略,从之前SQL中复制 ----->
分布式表
在每一节点执行:
CREATE TABLE test.hits_distributed
<!----- 此处省略,从之前SQL中复制 ----->
ENGINE = Distributed('test_cluster', '', 'hits_shard', rand());
结果测试
导入数据
clickhouse-client --host ch-host01 --port 9001 --password default_password --query "INSERT INTO test.hits_distributed FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv
查询数据
SELECT COUNT(*) FROM test.hits_distributed;