0


ClickHouse 集群部署(不需要 Zookeeper)

    ClickHouse 可以在任何具有x86_64、AArch64 或 PowerPC64LE CPU 架构的 Linux,FreeBSD 或 Mac OS X 上运行。官方预构建的二进制文件通常针对 x86_64 进行编译,并利用 SSE 4.2 指令集,因此,除非另有说明,支持它的 CPU 使用将成为额外的系统需求。下面是检查当前 CPU 是否支持 SSE 4.2 的命令:
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"

一、单节点设置

1. 下载

# 查看版本
https://github.com/ClickHouse/ClickHouse/releases

# 最新稳定版本安装包下载地址
https://github.com/ClickHouse/ClickHouse/releases/download/v24.1.8.22-stable/clickhouse-client-24.1.8.22.x86_64.rpm
https://github.com/ClickHouse/ClickHouse/releases/download/v24.1.8.22-stable/clickhouse-common-static-24.1.8.22.x86_64.rpm
https://github.com/ClickHouse/ClickHouse/releases/download/v24.1.8.22-stable/clickhouse-server-24.1.8.22.x86_64.rpm

2. 安装

# 首先安装通用依赖包
rpm -ivh clickhouse-common-static-24.1.8.22.x86_64.rpm
# 安装服务器,遇到 Enter password for default user 提示时输入密码
rpm -ivh clickhouse-server-24.1.8.22.x86_64.rpm
# 安装命令行客户端
rpm -ivh clickhouse-client-24.1.8.22.x86_64.rpm

3. 启动

sudo service clickhouse-server start
    查看启动后进程:
[root@vvml-yz-hbase-test~]#ps -ef | grep clickhouse | grep -v grep
clickho+  5322     1  0 08:49 ?        00:00:00 clickhouse-watchdog        --config-file /etc/clickhouse-server/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid --daemon
clickho+  5323  5322  6 08:49 ?        00:00:00 /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid --daemon
[root@vvml-yz-hbase-test~]#
    查看监听端口
[root@vvml-yz-hbase-test~]#netstat -antpl | grep clickhouse
tcp        0      0 127.0.0.1:9004          0.0.0.0:*               LISTEN      5323/clickhouse-ser 
tcp        0      0 127.0.0.1:9005          0.0.0.0:*               LISTEN      5323/clickhouse-ser 
tcp        0      0 127.0.0.1:9009          0.0.0.0:*               LISTEN      5323/clickhouse-ser 
tcp        0      0 127.0.0.1:8123          0.0.0.0:*               LISTEN      5323/clickhouse-ser 
tcp        0      0 127.0.0.1:9000          0.0.0.0:*               LISTEN      5323/clickhouse-ser 
tcp6       0      0 ::1:9004                :::*                    LISTEN      5323/clickhouse-ser 
tcp6       0      0 ::1:9005                :::*                    LISTEN      5323/clickhouse-ser 
tcp6       0      0 ::1:9009                :::*                    LISTEN      5323/clickhouse-ser 
tcp6       0      0 ::1:8123                :::*                    LISTEN      5323/clickhouse-ser 
tcp6       0      0 ::1:9000                :::*                    LISTEN      5323/clickhouse-ser 
[root@vvml-yz-hbase-test~]#
    服务端日志的默认位置是 /var/log/clickhouse-server/。当服务端在日志中记录 Ready for connections 消息,即表示服务端已准备好处理客户端连接。一旦 clickhouse-server 启动并运行,可以利用 clickhouse-client 连接到服务端,并运行一些测试查询。

4. 验证

[root@vvml-yz-hbase-test~]#clickhouse-client --password="123456" --query "select version();"
24.1.8.22
[root@vvml-yz-hbase-test~]#clickhouse-client --password="123456" -n --query "select 1;select 2;"
1
2
[root@vvml-yz-hbase-test~]#

二、导入示例数据集

1. 下载并提取表数据

curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
curl https://datasets.clickhouse.com/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv

2. 创建库表

    创建 sample.sql 文件,内容如下(注意嵌套表定义):
create database if not exists tutorial;

CREATE TABLE tutorial.hits_v1
(
    `WatchID` UInt64,
    `JavaEnable` UInt8,
    `Title` String,
    `GoodEvent` Int16,
    `EventTime` DateTime,
    `EventDate` Date,
    `CounterID` UInt32,
    `ClientIP` UInt32,
    `ClientIP6` FixedString(16),
    `RegionID` UInt32,
    `UserID` UInt64,
    `CounterClass` Int8,
    `OS` UInt8,
    `UserAgent` UInt8,
    `URL` String,
    `Referer` String,
    `URLDomain` String,
    `RefererDomain` String,
    `Refresh` UInt8,
    `IsRobot` UInt8,
    `RefererCategories` Array(UInt16),
    `URLCategories` Array(UInt16),
    `URLRegions` Array(UInt32),
    `RefererRegions` Array(UInt32),
    `ResolutionWidth` UInt16,
    `ResolutionHeight` UInt16,
    `ResolutionDepth` UInt8,
    `FlashMajor` UInt8,
    `FlashMinor` UInt8,
    `FlashMinor2` String,
    `NetMajor` UInt8,
    `NetMinor` UInt8,
    `UserAgentMajor` UInt16,
    `UserAgentMinor` FixedString(2),
    `CookieEnable` UInt8,
    `JavascriptEnable` UInt8,
    `IsMobile` UInt8,
    `MobilePhone` UInt8,
    `MobilePhoneModel` String,
    `Params` String,
    `IPNetworkID` UInt32,
    `TraficSourceID` Int8,
    `SearchEngineID` UInt16,
    `SearchPhrase` String,
    `AdvEngineID` UInt8,
    `IsArtifical` UInt8,
    `WindowClientWidth` UInt16,
    `WindowClientHeight` UInt16,
    `ClientTimeZone` Int16,
    `ClientEventTime` DateTime,
    `SilverlightVersion1` UInt8,
    `SilverlightVersion2` UInt8,
    `SilverlightVersion3` UInt32,
    `SilverlightVersion4` UInt16,
    `PageCharset` String,
    `CodeVersion` UInt32,
    `IsLink` UInt8,
    `IsDownload` UInt8,
    `IsNotBounce` UInt8,
    `FUniqID` UInt64,
    `HID` UInt32,
    `IsOldCounter` UInt8,
    `IsEvent` UInt8,
    `IsParameter` UInt8,
    `DontCountHits` UInt8,
    `WithHash` UInt8,
    `HitColor` FixedString(1),
    `UTCEventTime` DateTime,
    `Age` UInt8,
    `Sex` UInt8,
    `Income` UInt8,
    `Interests` UInt16,
    `Robotness` UInt8,
    `GeneralInterests` Array(UInt16),
    `RemoteIP` UInt32,
    `RemoteIP6` FixedString(16),
    `WindowName` Int32,
    `OpenerName` Int32,
    `HistoryLength` Int16,
    `BrowserLanguage` FixedString(2),
    `BrowserCountry` FixedString(2),
    `SocialNetwork` String,
    `SocialAction` String,
    `HTTPError` UInt16,
    `SendTiming` Int32,
    `DNSTiming` Int32,
    `ConnectTiming` Int32,
    `ResponseStartTiming` Int32,
    `ResponseEndTiming` Int32,
    `FetchTiming` Int32,
    `RedirectTiming` Int32,
    `DOMInteractiveTiming` Int32,
    `DOMContentLoadedTiming` Int32,
    `DOMCompleteTiming` Int32,
    `LoadEventStartTiming` Int32,
    `LoadEventEndTiming` Int32,
    `NSToDOMContentLoadedTiming` Int32,
    `FirstPaintTiming` Int32,
    `RedirectCount` Int8,
    `SocialSourceNetworkID` UInt8,
    `SocialSourcePage` String,
    `ParamPrice` Int64,
    `ParamOrderID` String,
    `ParamCurrency` FixedString(3),
    `ParamCurrencyID` UInt16,
    `GoalsReached` Array(UInt32),
    `OpenstatServiceName` String,
    `OpenstatCampaignID` String,
    `OpenstatAdID` String,
    `OpenstatSourceID` String,
    `UTMSource` String,
    `UTMMedium` String,
    `UTMCampaign` String,
    `UTMContent` String,
    `UTMTerm` String,
    `FromTag` String,
    `HasGCLID` UInt8,
    `RefererHash` UInt64,
    `URLHash` UInt64,
    `CLID` UInt32,
    `YCLID` UInt64,
    `ShareService` String,
    `ShareURL` String,
    `ShareTitle` String,
    `ParsedParams` Nested(
        Key1 String,
        Key2 String,
        Key3 String,
        Key4 String,
        Key5 String,
        ValueDouble Float64),
    `IslandID` FixedString(16),
    `RequestNum` UInt32,
    `RequestTry` UInt8
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

CREATE TABLE tutorial.visits_v1
(
    `CounterID` UInt32,
    `StartDate` Date,
    `Sign` Int8,
    `IsNew` UInt8,
    `VisitID` UInt64,
    `UserID` UInt64,
    `StartTime` DateTime,
    `Duration` UInt32,
    `UTCStartTime` DateTime,
    `PageViews` Int32,
    `Hits` Int32,
    `IsBounce` UInt8,
    `Referer` String,
    `StartURL` String,
    `RefererDomain` String,
    `StartURLDomain` String,
    `EndURL` String,
    `LinkURL` String,
    `IsDownload` UInt8,
    `TraficSourceID` Int8,
    `SearchEngineID` UInt16,
    `SearchPhrase` String,
    `AdvEngineID` UInt8,
    `PlaceID` Int32,
    `RefererCategories` Array(UInt16),
    `URLCategories` Array(UInt16),
    `URLRegions` Array(UInt32),
    `RefererRegions` Array(UInt32),
    `IsYandex` UInt8,
    `GoalReachesDepth` Int32,
    `GoalReachesURL` Int32,
    `GoalReachesAny` Int32,
    `SocialSourceNetworkID` UInt8,
    `SocialSourcePage` String,
    `MobilePhoneModel` String,
    `ClientEventTime` DateTime,
    `RegionID` UInt32,
    `ClientIP` UInt32,
    `ClientIP6` FixedString(16),
    `RemoteIP` UInt32,
    `RemoteIP6` FixedString(16),
    `IPNetworkID` UInt32,
    `SilverlightVersion3` UInt32,
    `CodeVersion` UInt32,
    `ResolutionWidth` UInt16,
    `ResolutionHeight` UInt16,
    `UserAgentMajor` UInt16,
    `UserAgentMinor` UInt16,
    `WindowClientWidth` UInt16,
    `WindowClientHeight` UInt16,
    `SilverlightVersion2` UInt8,
    `SilverlightVersion4` UInt16,
    `FlashVersion3` UInt16,
    `FlashVersion4` UInt16,
    `ClientTimeZone` Int16,
    `OS` UInt8,
    `UserAgent` UInt8,
    `ResolutionDepth` UInt8,
    `FlashMajor` UInt8,
    `FlashMinor` UInt8,
    `NetMajor` UInt8,
    `NetMinor` UInt8,
    `MobilePhone` UInt8,
    `SilverlightVersion1` UInt8,
    `Age` UInt8,
    `Sex` UInt8,
    `Income` UInt8,
    `JavaEnable` UInt8,
    `CookieEnable` UInt8,
    `JavascriptEnable` UInt8,
    `IsMobile` UInt8,
    `BrowserLanguage` UInt16,
    `BrowserCountry` UInt16,
    `Interests` UInt16,
    `Robotness` UInt8,
    `GeneralInterests` Array(UInt16),
    `Params` Array(String),
    `Goals` Nested(
        ID UInt32,
        Serial UInt32,
        EventTime DateTime,
        Price Int64,
        OrderID String,
        CurrencyID UInt32),
    `WatchIDs` Array(UInt64),
    `ParamSumPrice` Int64,
    `ParamCurrency` FixedString(3),
    `ParamCurrencyID` UInt16,
    `ClickLogID` UInt64,
    `ClickEventID` Int32,
    `ClickGoodEvent` Int32,
    `ClickEventTime` DateTime,
    `ClickPriorityID` Int32,
    `ClickPhraseID` Int32,
    `ClickPageID` Int32,
    `ClickPlaceID` Int32,
    `ClickTypeID` Int32,
    `ClickResourceID` Int32,
    `ClickCost` UInt32,
    `ClickClientIP` UInt32,
    `ClickDomainID` UInt32,
    `ClickURL` String,
    `ClickAttempt` UInt8,
    `ClickOrderID` UInt32,
    `ClickBannerID` UInt32,
    `ClickMarketCategoryID` UInt32,
    `ClickMarketPP` UInt32,
    `ClickMarketCategoryName` String,
    `ClickMarketPPName` String,
    `ClickAWAPSCampaignName` String,
    `ClickPageName` String,
    `ClickTargetType` UInt16,
    `ClickTargetPhraseID` UInt64,
    `ClickContextType` UInt8,
    `ClickSelectType` Int8,
    `ClickOptions` String,
    `ClickGroupBannerID` Int32,
    `OpenstatServiceName` String,
    `OpenstatCampaignID` String,
    `OpenstatAdID` String,
    `OpenstatSourceID` String,
    `UTMSource` String,
    `UTMMedium` String,
    `UTMCampaign` String,
    `UTMContent` String,
    `UTMTerm` String,
    `FromTag` String,
    `HasGCLID` UInt8,
    `FirstVisit` DateTime,
    `PredLastVisit` Date,
    `LastVisit` Date,
    `TotalVisits` UInt32,
    `TraficSource` Nested(
        ID Int8,
        SearchEngineID UInt16,
        AdvEngineID UInt8,
        PlaceID UInt16,
        SocialSourceNetworkID UInt8,
        Domain String,
        SearchPhrase String,
        SocialSourcePage String),
    `Attendance` FixedString(16),
    `CLID` UInt32,
    `YCLID` UInt64,
    `NormalizedRefererHash` UInt64,
    `SearchPhraseHash` UInt64,
    `RefererDomainHash` UInt64,
    `NormalizedStartURLHash` UInt64,
    `StartURLDomainHash` UInt64,
    `NormalizedEndURLHash` UInt64,
    `TopLevelDomain` UInt64,
    `URLScheme` UInt64,
    `OpenstatServiceNameHash` UInt64,
    `OpenstatCampaignIDHash` UInt64,
    `OpenstatAdIDHash` UInt64,
    `OpenstatSourceIDHash` UInt64,
    `UTMSourceHash` UInt64,
    `UTMMediumHash` UInt64,
    `UTMCampaignHash` UInt64,
    `UTMContentHash` UInt64,
    `UTMTermHash` UInt64,
    `FromHash` UInt64,
    `WebVisorEnabled` UInt8,
    `WebVisorActivity` UInt32,
    `ParsedParams` Nested(
        Key1 String,
        Key2 String,
        Key3 String,
        Key4 String,
        Key5 String,
        ValueDouble Float64),
    `Market` Nested(
        Type UInt8,
        GoalID UInt32,
        OrderID String,
        OrderPrice Int64,
        PP UInt32,
        DirectPlaceID UInt32,
        DirectOrderID UInt32,
        DirectBannerID UInt32,
        GoodID String,
        GoodName String,
        GoodQuantity Int32,
        GoodPrice Int64),
    `IslandID` FixedString(16)
)
ENGINE = CollapsingMergeTree(Sign)
PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID);
    执行 sample.sql 文件:
clickhouse-client --password="123456" --queries-file sample.sql

3. 导入数据

clickhouse-client --password="123456" --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv
clickhouse-client --password="123456" --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv
    找出可用的设置、含义及其默认值的最简单方法是查询 system.settings 表:
vvml-yz-hbase-test.172.18.4.126 :) select name, value, changed, description
  from system.settings
 where name like '%max_insert_b%';

SELECT
    name,
    value,
    changed,
    description
FROM system.settings
WHERE name LIKE '%max_insert_b%'

Query id: 05bc6241-2d1f-432e-87b3-f35a3ad612c8

┌─name──────────────────┬─value───┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────┐
│ max_insert_block_size │ 1048449 │       0 │ The maximum block size for insertion, if we control the creation of blocks for insertion. │
└───────────────────────┴─────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

4. 优化表

    可以 OPTIMIZE 导入后的表。使用 MergeTree-family 引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。这些查询强制表引擎立即进行存储优化(较慢,谨慎手工执行):
clickhouse-client --password="123456" --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
clickhouse-client --password="123456" --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL"

5. 查询示例

vvml-yz-hbase-test.172.18.4.126 :) SELECT
    StartURL AS URL,
    AVG(Duration) AS AvgDuration
FROM tutorial.visits_v1
WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30'
GROUP BY URL
ORDER BY AvgDuration DESC
LIMIT 10;

SELECT
    StartURL AS URL,
    AVG(Duration) AS AvgDuration
FROM tutorial.visits_v1
WHERE (StartDate >= '2014-03-23') AND (StartDate <= '2014-03-30')
GROUP BY URL
ORDER BY AvgDuration DESC
LIMIT 10

Query id: fc2e8638-5081-496a-964b-a679eab63af9

┌─URL─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─AvgDuration─┐
│ http://itpalanija-pri-patrivative=0&ads_app_user                                                                                                                                                │       60127 │
│ http://renaul-myd-ukraine                                                                                                                                                                       │       58938 │
│ http://karta/Futbol/dynamo.kiev.ua/kawaica.su/648                                                                                                                                               │       56538 │
│ https://moda/vyikroforum1/top.ru/moscow/delo-product/trend_sms/multitryaset/news/2014/03/201000                                                                                                 │       55218 │
│ http://e.mail=on&default?abid=2061&scd=yes&option?r=city_inter.com/menu&site-zaferio.ru/c/m.ensor.net/ru/login=false&orderStage.php?Brandidatamalystyle/20Mar2014%2F007%2F94dc8d2e06e56ed56bbdd │       51378 │
│ http://karta/Futbol/dynas.com/haberler.ru/messages.yandsearchives/494503_lte_13800200319                                                                                                        │       49078 │
│ http://xmusic/vstreatings of speeds                                                                                                                                                             │       36925 │
│ http://news.ru/yandex.ru/api.php&api=http://toberria.ru/aphorizana                                                                                                                              │       36902 │
│ http://bashmelnykh-metode.net/video/#!/video/emberkas.ru/detskij-yazi.com/iframe/default.aspx?id=760928&noreask=1&source                                                                        │       34323 │
│ http://censonhaber/547-popalientLog=0&strizhki-petro%3D&comeback=search?lr=213&text                                                                                                             │       31773 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────┘

10 rows in set. Elapsed: 0.080 sec. Processed 1.43 million rows, 112.12 MB (17.91 million rows/s., 1.40 GB/s.)
Peak memory usage: 45.07 MiB.

vvml-yz-hbase-test.172.18.4.126 :) SELECT
    sum(Sign) AS visits,
    sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
    (100. * goal_visits) / visits AS goal_percent
FROM tutorial.visits_v1
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru');

SELECT
    sum(Sign) AS visits,
    sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
    (100. * goal_visits) / visits AS goal_percent
FROM tutorial.visits_v1
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')

Query id: 02df86cc-ef97-4cec-9892-6ba92dc87d2c

┌─visits─┬─goal_visits─┬──────goal_percent─┐
│  10543 │        8553 │ 81.12491700654462 │
└────────┴─────────────┴───────────────────┘

1 row in set. Elapsed: 0.014 sec. Processed 19.72 thousand rows, 3.45 MB (1.46 million rows/s., 255.24 MB/s.)
Peak memory usage: 5.01 MiB.

vvml-yz-hbase-test.172.18.4.126 :) 

三、集群部署

    本次部署使用四台主机,构建两个分片,每个分片两个副本的 ClickHouse 集群,IP 和主机名如下:
172.18.4.126    node1
172.18.4.188    node2
172.18.4.71    node3
172.18.4.86    node4
    注意,ClickHouse 要求每个分片的每个副本必须配置在单独的实例上,也就是说在整个集群范围内,一共有多少个副本,就需要创建多少个 ClickHouse 实例。最佳实践是3分片2副本6实例。

    ClickHouse 推荐使用 ClickHouse Keeper 替代 Zookeeper(https://clickhouse.com/docs/knowledgebase/why_recommend_clickhouse_keeper_over_zookeeper)。
     下面使用 ClickHouse Keeper 配置 ClickHouse 集群。具体操作步骤参考 ClickHouse Keeper 用户指南(https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#clickhouse-keeper-user-guide)。

    ClickHouse 服务器中捆绑了 clickhouse-keeper。如果已经安装服务器,则无法单独安装 clickhouse-keeper,会收到冲突错误:
[root@vvml-yz-hbase-test~]#rpm -ivh clickhouse-keeper-24.1.8.22.x86_64.rpm 
error: Failed dependencies:
    clickhouse-server conflicts with clickhouse-keeper-0:24.1.8.22-1.x86_64
[root@vvml-yz-hbase-test~]#
    但如果有仅用作 clickhouse-keeper 的服务器,则仅可以单独安装 clickhouse-keeper。
# 查看版本
https://github.com/ClickHouse/ClickHouse/releases

# 最新稳定版本安装包下载地址
https://github.com/ClickHouse/ClickHouse/releases/download/v24.1.8.22-stable/clickhouse-keeper-24.1.8.22.x86_64.rpm
    本次部署在每个主机安装 clickhouse-server,而不单独安装 clickhouse-keeper。分片、副本、keeper规划如下:
node1    分片1副本1    keeper
node2    分片1副本2    keeper
node3    分片2副本1    keeper
node4    分片2副本2
    四台主机都作为数据分片,每个数据分片有两个副本。keeper 部署到三个实例上,奇数实例用于实现 ClickHouse Keeper 中要求的票选数。

0. 安装前准备

    在配置 ClickHouse 集群前,需要在全部四台机器上完成以下准备工作:
  • 启动 NTP 时钟同步
  • /etc/hosts 文件中添加构成集群的所有主机名
  • 配置所有主机间 ssh 免密
  • 修改用户可打开文件数与进程数
  • 禁用防火墙
  • 禁用 transparent hugepage

1. 安装配置 ClickHouse Keeper

    在 node1、node2、node3 三台主机上执行下面的操作步骤。

(1)安装 ClickHouse Server 和 ClickHouse Client

rpm -ivh clickhouse-common-static-24.1.8.22.x86_64.rpm
rpm -ivh clickhouse-server-24.1.8.22.x86_64.rpm
rpm -ivh clickhouse-client-24.1.8.22.x86_64.rpm

(2)修改 ClickHouse 主配置文件

# 修改前先备份
cp /etc/clickhouse-server/config.xml /etc/clickhouse-server/config.xml.bak
    修改 /etc/clickhouse-server/config.xml 主配置文件,在根节点 <clickhouse> 下添加以下内容:
<!-- 添加以下条目以允许通过网络接口进行外部通信。 -->
<listen_host>0.0.0.0</listen_host>

<!-- 指定实例启用 ClickHouse Keeper。更新每台服务器的<server_id>设置,node1为1、node2为2、node3为3。-->
<keeper_server>
    <tcp_port>9181</tcp_port>
    <server_id>1</server_id>
    <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
    <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>

    <coordination_settings>
        <operation_timeout_ms>10000</operation_timeout_ms>
        <session_timeout_ms>30000</session_timeout_ms>
        <raft_logs_level>warning</raft_logs_level>
    </coordination_settings>

    <raft_configuration>
        <server>
            <id>1</id>
            <hostname>node1</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>2</id>
            <hostname>node2</hostname>
            <port>9234</port>
        </server>
        <server>
            <id>3</id>
            <hostname>node3</hostname>
            <port>9234</port>
        </server>
    </raft_configuration>
</keeper_server>

<!-- 指定实例所使用的 ClickHouse Keeper -->
<zookeeper>
    <node>
        <host>node1</host>
        <port>9181</port>
    </node>
    <node>
        <host>node2</host>
        <port>9181</port>
    </node>
    <node>
        <host>node3</host>
        <port>9181</port>
    </node>
</zookeeper>

(3)重启 ClickHouse

sudo service clickhouse-server restart

(4)验证 Keeper 实例是否正在运行

    在 node1、node2、node3 上执行下面的命令,如果 Keeper 运行正常,ruok 命令将返回 imok:
[root@vvml-yz-hbase-test~]#echo ruok | nc localhost 9181; echo
imok
[root@vvml-yz-hbase-test~]#

(5)确认 zookeeper 系统表

    系统数据库有一个名为 zookeeper 的表,其中包含 ClickHouse Keeper 实例的详细信息:
[root@vvml-yz-hbase-test~]#clickhouse-client --password="123456"
ClickHouse client version 24.1.8.22 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 24.1.8.

vvml-yz-hbase-test.172.18.4.126 :) SELECT *
FROM system.zookeeper
WHERE path IN ('/', '/clickhouse');

SELECT *
FROM system.zookeeper
WHERE path IN ('/', '/clickhouse')

Query id: e713c446-26c5-4c3f-994c-db22eb68b9ad

┌─name───────┬─value─┬─path────────┐
│ keeper     │       │ /           │
│ clickhouse │       │ /           │
│ task_queue │       │ /clickhouse │
│ sessions   │       │ /clickhouse │
└────────────┴───────┴─────────────┘

4 rows in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

2. 配置 ClickHouse 集群

(1)在新主机(node4)上安装 ClickHouse Server 和 ClickHouse Client

rpm -ivh clickhouse-common-static-24.1.8.22.x86_64.rpm
rpm -ivh clickhouse-server-24.1.8.22.x86_64.rpm
rpm -ivh clickhouse-client-24.1.8.22.x86_64.rpm

(2)修改新主机(node4)上的配置文件

# 修改前先备份
cp /etc/clickhouse-server/config.xml /etc/clickhouse-server/config.xml.bak
    修改 /etc/clickhouse-server/config.xml 主配置文件,在根节点 <clickhouse> 下添加以下内容:
<!-- 添加以下条目以允许通过网络接口进行外部通信。 -->
<listen_host>0.0.0.0</listen_host>

<!-- 指定实例所使用的 ClickHouse Keeper -->
<zookeeper>
    <node>
        <host>node1</host>
        <port>9181</port>
    </node>
    <node>
        <host>node2</host>
        <port>9181</port>
    </node>
    <node>
        <host>node3</host>
        <port>9181</port>
    </node>
</zookeeper>
    然后在全部四台主机上执行下面的操作步骤。

(3)更新配置

    修改 /etc/clickhouse-server/config.xml 主配置文件,在根节点 <clickhouse> 下添加以下内容:
    <remote_servers>
        <cluster_2S_2R>
            <shard>
                <replica>
                    <host>node1</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
                <replica>
                    <host>node2</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
            </shard>
            <shard>
                <replica>
                    <host>node3</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
                <replica>
                    <host>node4</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
            </shard>
        </cluster_2S_2R>
    </remote_servers>

(4)重启 ClickHouse 并验证集群已创建

sudo service clickhouse-server restart
    查看集群:
[root@vvml-yz-hbase-test~]#clickhouse-client --password="123456"
ClickHouse client version 24.1.8.22 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 24.1.8.

vvml-yz-hbase-test.172.18.4.126 :) SHOW CLUSTERS;

SHOW CLUSTERS

Query id: 80e81978-d1f4-4721-85d8-7e7803230373

┌─cluster───────┐
│ cluster_2S_2R │
└───────────────┘

1 row in set. Elapsed: 0.001 sec. 

vvml-yz-hbase-test.172.18.4.126 :) select cluster,shard_num,replica_num,host_name,is_local,user,database_shard_name,database_replica_name from system.clusters;

SELECT
    cluster,
    shard_num,
    replica_num,
    host_name,
    is_local,
    user,
    database_shard_name,
    database_replica_name
FROM system.clusters

Query id: 4b39d9ec-b4f7-4557-b76d-05f3893f4ef7

┌─cluster───────┬─shard_num─┬─replica_num─┬─host_name─┬─is_local─┬─user────┬─database_shard_name─┬─database_replica_name─┐
│ cluster_2S_2R │         1 │           1 │ node1     │        1 │ default │                     │                       │
│ cluster_2S_2R │         1 │           2 │ node2     │        0 │ default │                     │                       │
│ cluster_2S_2R │         2 │           1 │ node3     │        0 │ default │                     │                       │
│ cluster_2S_2R │         2 │           2 │ node4     │        0 │ default │                     │                       │
└───────────────┴───────────┴─────────────┴───────────┴──────────┴─────────┴─────────────────────┴───────────────────────┘

4 rows in set. Elapsed: 0.001 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

3. 创建分布式表

(1)创建数据库

    使用 node1 上创建一个数据库。ON CLUSTER子句会自动在所有实例上创建数据库。
vvml-yz-hbase-test.172.18.4.126 :) CREATE DATABASE db1 ON CLUSTER 'cluster_2S_2R';

CREATE DATABASE db1 ON CLUSTER cluster_2S_2R

Query id: 7a8cd789-bcfb-4855-a131-ba7935cffcfb

┌─host──┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ node1 │ 9000 │      0 │       │                   3 │                0 │
│ node3 │ 9000 │      0 │       │                   2 │                0 │
│ node4 │ 9000 │      0 │       │                   1 │                0 │
│ node2 │ 9000 │      0 │       │                   0 │                0 │
└───────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘

4 rows in set. Elapsed: 0.099 sec. 

vvml-yz-hbase-test.172.18.4.126 :)

(2)创建本地表

    在 db1 库中建表,同样,ON CLUSTER 子句会自动在所有实例上建表。
vvml-yz-hbase-test.172.18.4.126 :) CREATE TABLE db1.table1 on cluster 'cluster_2S_2R'
(
    `id` UInt64,
    `column1` String
)
ENGINE = MergeTree
ORDER BY column1;

CREATE TABLE db1.table1 ON CLUSTER cluster_2S_2R
(
    `id` UInt64,
    `column1` String
)
ENGINE = MergeTree
ORDER BY column1

Query id: abb936ad-3618-4821-92f6-cfaa83fb4d51

┌─host──┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ node3 │ 9000 │      0 │       │                   3 │                2 │
│ node1 │ 9000 │      0 │       │                   2 │                2 │
└───────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host──┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ node4 │ 9000 │      0 │       │                   1 │                0 │
│ node2 │ 9000 │      0 │       │                   0 │                0 │
└───────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘

4 rows in set. Elapsed: 0.158 sec. 

vvml-yz-hbase-test.172.18.4.126 :)

(3)在一个分片实例上(node1)新增两行

vvml-yz-hbase-test.172.18.4.126 :) INSERT INTO db1.table1
    (id, column1)
VALUES
    (1, 'abc'),
    (2, 'def');

INSERT INTO db1.table1 (id, column1) FORMAT Values

Query id: 959da99c-c473-4c3a-9381-fa65b447161c

Ok.

2 rows in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

(4)在另一个分片实例上(node3)新增两行

vvml-yz-hbase-test.172.18.4.71 :) INSERT INTO db1.table1
    (id, column1)
VALUES
    (3, 'ghi'),
    (4, 'jkl');

INSERT INTO db1.table1 (id, column1) FORMAT Values

Query id: c8864197-ec82-4aba-8c2e-aaa2af468553

Ok.

2 rows in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.71 :)

(5)在所有实例上分别执行 SELECT 查询

    node1、node3 上查询结果显示该本地实例上的两行数据:
# node1
vvml-yz-hbase-test.172.18.4.126 :) SELECT *
FROM db1.table1;

SELECT *
FROM db1.table1

Query id: 2f2fc679-9091-41ae-967b-4bd8e2ec7311

┌─id─┬─column1─┐
│  1 │ abc     │
│  2 │ def     │
└────┴─────────┘

2 rows in set. Elapsed: 0.001 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

# node3
vvml-yz-hbase-test.172.18.4.71 :) SELECT *
FROM db1.table1;

SELECT *
FROM db1.table1

Query id: 18843522-c678-45f0-901d-73e1bbfd4dbf

┌─id─┬─column1─┐
│  3 │ ghi     │
│  4 │ jkl     │
└────┴─────────┘

2 rows in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.71 :) 
    node2、node4 上查不到数据,说明写入本地表并没有复制到其副本。

(6)创建分布式表

    可以创建一个分布式表来表示两个分片上的数据。具有分布式表引擎的表不存储自己的任何数据,而是允许在多个服务器上进行分布式查询处理。读取命中所有分片,写入可以分布在分片之间。在任一实例上创建分布式表:
vvml-yz-hbase-test.172.18.4.126 :) CREATE TABLE db1.dist_table ON CLUSTER 'cluster_2S_2R'
(
    id UInt64,
    column1 String
)
ENGINE = Distributed(cluster_2S_2R,db1,table1);

CREATE TABLE db1.dist_table ON CLUSTER cluster_2S_2R
(
    `id` UInt64,
    `column1` String
)
ENGINE = Distributed(cluster_2S_2R, db1, table1)

Query id: 7c08e756-90cf-4014-9368-dc41fe7d06f4

┌─host──┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ node4 │ 9000 │      0 │       │                   3 │                0 │
│ node2 │ 9000 │      0 │       │                   2 │                0 │
│ node1 │ 9000 │      0 │       │                   1 │                0 │
│ node3 │ 9000 │      0 │       │                   0 │                0 │
└───────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘

4 rows in set. Elapsed: 0.095 sec. 

vvml-yz-hbase-test.172.18.4.126 :)

(7)在所有实例上分别执行 SELECT 查询

    所有实例上都创建了 db1.dist_table 表,但是 SELECT *

FROM db1.dist_table 查询结果却出乎意料。预想在所有实例上查询的结果相同,都返回两个分片中的所有四行数据,而实际四个实例返回的结果是不确定的:node1 有时返回全部4行,有时只返回前两行;node3 有时返回全部4行,有时只返回后两行;node2 有时返回后两行,有时不返回数据;node4 有时返回前两行,有时不返回数据。

    说明:
  • ClickHouse 集群是一种对等架构,在一个集群里每个 ClickHouse 实例都是独立的,即使是同一个分片内的不同副本实例间,也是没有主从概念。
  • 在集群中任何一个实例上执行的 DDL 语句中使用 ON CLUSTER 子句,会自动在集群中的所有实例上执行。
  • 分布式表实际上是一种视图,映射到 ClickHouse 集群实例上的本地表。从分布式表中执行 SELECT 查询会使用集群所有相关分片的资源。
  • 底层本地表如果使用 MergeTree 表引擎,在分布式表上执行同一查询,返回结果有可能不确定。

4. 数据自动分片

    首先想到的是如果在分布式表上能够执行 DML 语句,就应该可以达到数据自动分片的效果。
vvml-yz-hbase-test.172.18.4.126 :) insert into db1.dist_table
    (id, column1)
values
    (5, 'mno'),
    (6, 'pqr');

INSERT INTO db1.dist_table (id, column1) FORMAT Values

Query id: 78b4cc88-9b13-4078-a3f0-d405f4338cfb

Elapsed: 0.002 sec. 

Received exception from server (version 24.1.8):
Code: 55. DB::Exception: Received from localhost:9000. DB::Exception: Method write is not supported by storage Distributed with more than one shard and no sharding key provided. (STORAGE_REQUIRES_PARAMETER)

vvml-yz-hbase-test.172.18.4.126 :)
    报错明确指出,当数据存储分布在多于一个的分片上,并且没有提供分片键时,分布式表不支持写入。为了进一步演示,下面使用和创建 hits_v1 表类似的 CREATE TABLE 语句创建一个新的本地表,有三点不同:
  • 库名不同。
  • 表名不同。
  • 使用 ON CLUSTER 子句。

(1)创建带有分片键的本地表

    在任一实例上执行下面的建表语句:
CREATE TABLE db1.hits_local ON CLUSTER 'cluster_2S_2R'
(
    `WatchID` UInt64,
    `JavaEnable` UInt8,
    `Title` String,
    `GoodEvent` Int16,
    `EventTime` DateTime,
    `EventDate` Date,
    `CounterID` UInt32,
    `ClientIP` UInt32,
    `ClientIP6` FixedString(16),
    `RegionID` UInt32,
    `UserID` UInt64,
    `CounterClass` Int8,
    `OS` UInt8,
    `UserAgent` UInt8,
    `URL` String,
    `Referer` String,
    `URLDomain` String,
    `RefererDomain` String,
    `Refresh` UInt8,
    `IsRobot` UInt8,
    `RefererCategories` Array(UInt16),
    `URLCategories` Array(UInt16),
    `URLRegions` Array(UInt32),
    `RefererRegions` Array(UInt32),
    `ResolutionWidth` UInt16,
    `ResolutionHeight` UInt16,
    `ResolutionDepth` UInt8,
    `FlashMajor` UInt8,
    `FlashMinor` UInt8,
    `FlashMinor2` String,
    `NetMajor` UInt8,
    `NetMinor` UInt8,
    `UserAgentMajor` UInt16,
    `UserAgentMinor` FixedString(2),
    `CookieEnable` UInt8,
    `JavascriptEnable` UInt8,
    `IsMobile` UInt8,
    `MobilePhone` UInt8,
    `MobilePhoneModel` String,
    `Params` String,
    `IPNetworkID` UInt32,
    `TraficSourceID` Int8,
    `SearchEngineID` UInt16,
    `SearchPhrase` String,
    `AdvEngineID` UInt8,
    `IsArtifical` UInt8,
    `WindowClientWidth` UInt16,
    `WindowClientHeight` UInt16,
    `ClientTimeZone` Int16,
    `ClientEventTime` DateTime,
    `SilverlightVersion1` UInt8,
    `SilverlightVersion2` UInt8,
    `SilverlightVersion3` UInt32,
    `SilverlightVersion4` UInt16,
    `PageCharset` String,
    `CodeVersion` UInt32,
    `IsLink` UInt8,
    `IsDownload` UInt8,
    `IsNotBounce` UInt8,
    `FUniqID` UInt64,
    `HID` UInt32,
    `IsOldCounter` UInt8,
    `IsEvent` UInt8,
    `IsParameter` UInt8,
    `DontCountHits` UInt8,
    `WithHash` UInt8,
    `HitColor` FixedString(1),
    `UTCEventTime` DateTime,
    `Age` UInt8,
    `Sex` UInt8,
    `Income` UInt8,
    `Interests` UInt16,
    `Robotness` UInt8,
    `GeneralInterests` Array(UInt16),
    `RemoteIP` UInt32,
    `RemoteIP6` FixedString(16),
    `WindowName` Int32,
    `OpenerName` Int32,
    `HistoryLength` Int16,
    `BrowserLanguage` FixedString(2),
    `BrowserCountry` FixedString(2),
    `SocialNetwork` String,
    `SocialAction` String,
    `HTTPError` UInt16,
    `SendTiming` Int32,
    `DNSTiming` Int32,
    `ConnectTiming` Int32,
    `ResponseStartTiming` Int32,
    `ResponseEndTiming` Int32,
    `FetchTiming` Int32,
    `RedirectTiming` Int32,
    `DOMInteractiveTiming` Int32,
    `DOMContentLoadedTiming` Int32,
    `DOMCompleteTiming` Int32,
    `LoadEventStartTiming` Int32,
    `LoadEventEndTiming` Int32,
    `NSToDOMContentLoadedTiming` Int32,
    `FirstPaintTiming` Int32,
    `RedirectCount` Int8,
    `SocialSourceNetworkID` UInt8,
    `SocialSourcePage` String,
    `ParamPrice` Int64,
    `ParamOrderID` String,
    `ParamCurrency` FixedString(3),
    `ParamCurrencyID` UInt16,
    `GoalsReached` Array(UInt32),
    `OpenstatServiceName` String,
    `OpenstatCampaignID` String,
    `OpenstatAdID` String,
    `OpenstatSourceID` String,
    `UTMSource` String,
    `UTMMedium` String,
    `UTMCampaign` String,
    `UTMContent` String,
    `UTMTerm` String,
    `FromTag` String,
    `HasGCLID` UInt8,
    `RefererHash` UInt64,
    `URLHash` UInt64,
    `CLID` UInt32,
    `YCLID` UInt64,
    `ShareService` String,
    `ShareURL` String,
    `ShareTitle` String,
    `ParsedParams` Nested(
        Key1 String,
        Key2 String,
        Key3 String,
        Key4 String,
        Key5 String,
        ValueDouble Float64),
    `IslandID` FixedString(16),
    `RequestNum` UInt32,
    `RequestTry` UInt8
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

(2)创建提供集群本地表视图的分布式表

    在任一实例上执行下面的建表语句:
CREATE TABLE db1.hits_all ON CLUSTER 'cluster_2S_2R'
AS db1.hits_local
ENGINE = Distributed(cluster_2S_2R, db1, hits_local, rand());

(3)向分布式表中插入数据

    在任一实例上执行下面的 insert 语句:
INSERT INTO db1.hits_all SELECT * FROM tutorial.hits_v1;

(4)在所有实例上分别执行 SELECT 查询

# node1
vvml-yz-hbase-test.172.18.4.126 :) select count(*) from db1.hits_local;

SELECT count(*)
FROM db1.hits_local

Query id: 10a10594-dad1-4693-96fa-a27e62256cc0

┌─count()─┐
│ 4437894 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.126 :) select count(*) from db1.hits_all;

SELECT count(*)
FROM db1.hits_all

Query id: 4d825459-19fa-4f68-9326-cf6a9b7e113a

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.006 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

# node2
vvml-yz-hbase-test.172.18.4.188 :) select count(*) from db1.hits_local;

SELECT count(*)
FROM db1.hits_local

Query id: 7da679f4-4f0f-42e5-8c78-df6c9814c61f

┌─count()─┐
│ 4437894 │
└─────────┘

1 row in set. Elapsed: 0.003 sec. 

vvml-yz-hbase-test.172.18.4.188 :) select count(*) from db1.hits_all;

SELECT count(*)
FROM db1.hits_all

Query id: 29594d92-e38e-473a-bc91-7a5fb37e0647

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.005 sec. 

vvml-yz-hbase-test.172.18.4.188 :) 

# node3
vvml-yz-hbase-test.172.18.4.71 :) select count(*) from db1.hits_local;

SELECT count(*)
FROM db1.hits_local

Query id: 2e5a0fca-9b13-4c4e-8cb9-f9ee60da6152

┌─count()─┐
│ 4436004 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.71 :) select count(*) from db1.hits_all;

SELECT count(*)
FROM db1.hits_all

Query id: 85e6e7a4-eaab-4455-8c29-52ec0998a9b0

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.006 sec. 

vvml-yz-hbase-test.172.18.4.71 :) 

# node4
vvml-yz-hbase-test.172.18.4.86 :) select count(*) from db1.hits_local;

SELECT count(*)
FROM db1.hits_local

Query id: 9950210e-a4a4-4e1d-a449-4aefb2ea396f

┌─count()─┐
│ 4436004 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.86 :) select count(*) from db1.hits_all;

SELECT count(*)
FROM db1.hits_all

Query id: 45777141-ff08-452f-93b8-1650265d4175

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.009 sec. 

vvml-yz-hbase-test.172.18.4.86 :)
    从查询结果可以看到,本地表 node1、node2 上的查询结果相同,node3、node4 上的查询结果相同,分布式表四个节点查询结果都相同。node1 和 node3 分属于集群中的不同分片,而在创建分布式表时指定的分布规则随机,所以这两个实例上的本地表数据量存在少许差异是符合预期的。结论是:
  • 指定分片键的分布式表可以写数据。
  • 数据按创建分布式表时指定的自定义的分片规则分布。
  • select count() from tutorial.hits_v1 用了2毫秒,select count() from db1.hits_all 用了6毫秒,在本测试环境中,查询分布式表比查询本地表慢了两倍。
  • 底层表如果使用 MergeTree 表引擎,其上正常定义了分布规则的分布式表,插入分布式表可以自动在同一分片上的不同副本之间做数据同步。

5. 使用 ReplicatedMergeTree 表引擎复制数据

    按通常的理解,一个分布式数据库至少需要满足以下基本需求:
  • 数据按指定规则自动分片

  • 同一分片的多个副本数据保持一致

  • 数据分片和多副本间的数据同步对应用透明。

      ClickHouse 推荐的配置是使用 ReplicatedMergeTree 表引擎,自动完成副本间的数据复制。创建 ReplcatedMergeTree 表,通常需要设置宏来识别每个用于创建表的分片和副本。
    

(1)定义宏

    在全部四台主机上执行下面的操作步骤。

    修改 /etc/clickhouse-server/config.xml 主配置文件,在每个节点 <shard> 下添加 <internal_replication>true</internal_replication>:
    <remote_servers>
        <cluster_2S_2R>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>node1</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
                <replica>
                    <host>node2</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
            </shard>
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>node3</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
                <replica>
                    <host>node4</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>123456</password>
                </replica>
            </shard>
        </cluster_2S_2R>
    </remote_servers>
    在节点 </remote_servers> 后面添加以下内容:
<macros>
    <shard>01</shard>
    <replica>01</replica>
</macros>
    node1 配置成 01、01;node2 配置成 01、02;node3 配置成 02、01;node4 配置成 02、02。

    然后重启所有实例:
sudo service clickhouse-server restart

(2)创建本地表

    在任一实例上执行下面的建表语句:
CREATE TABLE db1.hits_replica ON CLUSTER 'cluster_2S_2R'
(
    `WatchID` UInt64,
    `JavaEnable` UInt8,
    `Title` String,
    `GoodEvent` Int16,
    `EventTime` DateTime,
    `EventDate` Date,
    `CounterID` UInt32,
    `ClientIP` UInt32,
    `ClientIP6` FixedString(16),
    `RegionID` UInt32,
    `UserID` UInt64,
    `CounterClass` Int8,
    `OS` UInt8,
    `UserAgent` UInt8,
    `URL` String,
    `Referer` String,
    `URLDomain` String,
    `RefererDomain` String,
    `Refresh` UInt8,
    `IsRobot` UInt8,
    `RefererCategories` Array(UInt16),
    `URLCategories` Array(UInt16),
    `URLRegions` Array(UInt32),
    `RefererRegions` Array(UInt32),
    `ResolutionWidth` UInt16,
    `ResolutionHeight` UInt16,
    `ResolutionDepth` UInt8,
    `FlashMajor` UInt8,
    `FlashMinor` UInt8,
    `FlashMinor2` String,
    `NetMajor` UInt8,
    `NetMinor` UInt8,
    `UserAgentMajor` UInt16,
    `UserAgentMinor` FixedString(2),
    `CookieEnable` UInt8,
    `JavascriptEnable` UInt8,
    `IsMobile` UInt8,
    `MobilePhone` UInt8,
    `MobilePhoneModel` String,
    `Params` String,
    `IPNetworkID` UInt32,
    `TraficSourceID` Int8,
    `SearchEngineID` UInt16,
    `SearchPhrase` String,
    `AdvEngineID` UInt8,
    `IsArtifical` UInt8,
    `WindowClientWidth` UInt16,
    `WindowClientHeight` UInt16,
    `ClientTimeZone` Int16,
    `ClientEventTime` DateTime,
    `SilverlightVersion1` UInt8,
    `SilverlightVersion2` UInt8,
    `SilverlightVersion3` UInt32,
    `SilverlightVersion4` UInt16,
    `PageCharset` String,
    `CodeVersion` UInt32,
    `IsLink` UInt8,
    `IsDownload` UInt8,
    `IsNotBounce` UInt8,
    `FUniqID` UInt64,
    `HID` UInt32,
    `IsOldCounter` UInt8,
    `IsEvent` UInt8,
    `IsParameter` UInt8,
    `DontCountHits` UInt8,
    `WithHash` UInt8,
    `HitColor` FixedString(1),
    `UTCEventTime` DateTime,
    `Age` UInt8,
    `Sex` UInt8,
    `Income` UInt8,
    `Interests` UInt16,
    `Robotness` UInt8,
    `GeneralInterests` Array(UInt16),
    `RemoteIP` UInt32,
    `RemoteIP6` FixedString(16),
    `WindowName` Int32,
    `OpenerName` Int32,
    `HistoryLength` Int16,
    `BrowserLanguage` FixedString(2),
    `BrowserCountry` FixedString(2),
    `SocialNetwork` String,
    `SocialAction` String,
    `HTTPError` UInt16,
    `SendTiming` Int32,
    `DNSTiming` Int32,
    `ConnectTiming` Int32,
    `ResponseStartTiming` Int32,
    `ResponseEndTiming` Int32,
    `FetchTiming` Int32,
    `RedirectTiming` Int32,
    `DOMInteractiveTiming` Int32,
    `DOMContentLoadedTiming` Int32,
    `DOMCompleteTiming` Int32,
    `LoadEventStartTiming` Int32,
    `LoadEventEndTiming` Int32,
    `NSToDOMContentLoadedTiming` Int32,
    `FirstPaintTiming` Int32,
    `RedirectCount` Int8,
    `SocialSourceNetworkID` UInt8,
    `SocialSourcePage` String,
    `ParamPrice` Int64,
    `ParamOrderID` String,
    `ParamCurrency` FixedString(3),
    `ParamCurrencyID` UInt16,
    `GoalsReached` Array(UInt32),
    `OpenstatServiceName` String,
    `OpenstatCampaignID` String,
    `OpenstatAdID` String,
    `OpenstatSourceID` String,
    `UTMSource` String,
    `UTMMedium` String,
    `UTMCampaign` String,
    `UTMContent` String,
    `UTMTerm` String,
    `FromTag` String,
    `HasGCLID` UInt8,
    `RefererHash` UInt64,
    `URLHash` UInt64,
    `CLID` UInt32,
    `YCLID` UInt64,
    `ShareService` String,
    `ShareURL` String,
    `ShareTitle` String,
    `ParsedParams` Nested(
        Key1 String,
        Key2 String,
        Key3 String,
        Key4 String,
        Key5 String,
        ValueDouble Float64),
    `IslandID` FixedString(16),
    `RequestNum` UInt32,
    `RequestTry` UInt8
)
ENGINE = ReplicatedMergeTree(
    '/clickhouse/tables/{shard}/hits',
    '{replica}'
)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

(3)创建分布式表

    在任一实例上执行下面的建表语句:
CREATE TABLE db1.hits_replica_all ON CLUSTER 'cluster_2S_2R'
AS db1.hits_replica
ENGINE = Distributed(cluster_2S_2R, db1, hits_replica, rand());

(4)向分布式表中插入数据

    在任一实例上执行下面的 insert 语句:
INSERT INTO db1.hits_replica_all SELECT * FROM tutorial.hits_v1;

(5)在所有实例上分别执行 SELECT 查询

# node1
vvml-yz-hbase-test.172.18.4.126 :) select count(*) from db1.hits_replica;

SELECT count(*)
FROM db1.hits_replica

Query id: e4f08a9c-39be-48cf-a8f9-6caaa98b9fed

┌─count()─┐
│ 4438089 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.126 :) select count(*) from db1.hits_replica_all;

SELECT count(*)
FROM db1.hits_replica_all

Query id: 765f0ea5-a199-4bfb-85c4-6fd31e23f1af

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.007 sec. 

vvml-yz-hbase-test.172.18.4.126 :) 

# node2
vvml-yz-hbase-test.172.18.4.188 :) select count(*) from db1.hits_replica;

SELECT count(*)
FROM db1.hits_replica

Query id: b464df00-4c1a-4fea-8f2f-70fd0c32569f

┌─count()─┐
│ 4438089 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.188 :) select count(*) from db1.hits_replica_all;

SELECT count(*)
FROM db1.hits_replica_all

Query id: 19a77125-7e7e-4654-bb90-8884c3b0ed26

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.007 sec. 

vvml-yz-hbase-test.172.18.4.188 :) 

# node3
vvml-yz-hbase-test.172.18.4.71 :) select count(*) from db1.hits_replica;

SELECT count(*)
FROM db1.hits_replica

Query id: e021f13c-b980-4297-9c26-b94465503101

┌─count()─┐
│ 4435809 │
└─────────┘

1 row in set. Elapsed: 0.003 sec. 

vvml-yz-hbase-test.172.18.4.71 :) select count(*) from db1.hits_replica_all;

SELECT count(*)
FROM db1.hits_replica_all

Query id: 80ebbda2-be03-445c-8c16-f67428a45572

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.005 sec. 

vvml-yz-hbase-test.172.18.4.71 :) 

# node4
vvml-yz-hbase-test.172.18.4.86 :) select count(*) from db1.hits_replica;

SELECT count(*)
FROM db1.hits_replica

Query id: 204f483d-3fcb-4758-8df4-20e79f78e42e

┌─count()─┐
│ 4435809 │
└─────────┘

1 row in set. Elapsed: 0.002 sec. 

vvml-yz-hbase-test.172.18.4.86 :) select count(*) from db1.hits_replica_all;

SELECT count(*)
FROM db1.hits_replica_all

Query id: e3d28b81-50b7-46e6-a855-1b40ef9fe42a

┌─count()─┐
│ 8873898 │
└─────────┘

1 row in set. Elapsed: 0.007 sec. 

vvml-yz-hbase-test.172.18.4.86 :)
    从查询结果可以看到,本地表 node1、node2 上的查询结果相同,node3、node4 上的查询结果相同,分布式表四个节点查询结果都相同。node1 和 node3 分属于集群中的不同分片,而在创建分布式表时指定的分布规则随机,所以这两个实例上的本地表数据量存在少许差异是符合预期的。ReplicatedMergeTree 表可以自动在同一分片的不同副本间同步数据。

参考:

  • 使用教程
  • ClickHouse Keeper (clickhouse-keeper)

本文转载自: https://blog.csdn.net/wzy0623/article/details/137809306
版权归原作者 wzy0623 所有, 如有侵权,请联系我们删除。

“ClickHouse 集群部署(不需要 Zookeeper)”的评论:

还没有评论