0


【原生HADOOP分布式集群搭建】

分布式大数据环境搭建手册

  1. 操作系统 CentOS-7 3 台 16g
  2. 软件列表 编号 软件名称以及版本 1 jdk1.8.0_333 2 zookeeper-3.4.10 3 mysql-5.7.36 4 hadoop-3.2.3 5 flume-1.8.0 6 kafka_2.12-1.1.0 7 hbase-2.4.11 8 Scala 2.12.1 9 spark-3.0.2 10 hive-3.1.2 11 Azkaban 12 zeppelin-0.10.1

3. 安装jdk(3台)

关闭防火墙
firewall-cmd –state
systemctl stop firewalld.service
systemctl disable firewalld.service
在线安装jdk
yum -y list java*
yum -y install java-1.8.0-openjdk java-1.8.0-openjdk-devel
java -version

配置环境变量
cd /usr/lib/jvm
vi /etc/profile

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7_9.x86_64
export CLASSPATH=.:

    J
   
   
    A
   
   
    V
   
   
    
     A
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    j
   
   
    r
   
   
    e
   
   
    /
   
   
    l
   
   
    i
   
   
    b
   
   
    /
   
   
    r
   
   
    t
   
   
    .
   
   
    j
   
   
    a
   
   
    r
   
   
    :
   
  
  
   JAVA_HOME/jre/lib/rt.jar:
  
 
JAVAH​OME/jre/lib/rt.jar:JAVA_HOME/lib/dt.jar:

 
  
   
    J
   
   
    A
   
   
    V
   
   
    
     A
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    l
   
   
    i
   
   
    b
   
   
    /
   
   
    t
   
   
    o
   
   
    o
   
   
    l
   
   
    s
   
   
    .
   
   
    j
   
   
    a
   
   
    r
   
   
    e
   
   
    x
   
   
    p
   
   
    o
   
   
    r
   
   
    t
   
   
    P
   
   
    A
   
   
    T
   
   
    H
   
   
    =
   
  
  
   JAVA_HOME/lib/tools.jar export PATH=
  
 
JAVAH​OME/lib/tools.jarexportPATH=PATH:$JAVA_HOME/bin

source /etc/profile

4.修改 hostname和hosts(3台)

4.1修改hostname(主master 从slave1,slave2)
vi /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=master
4.2修改hosts
vi /etc/hosts
添加3台服务器ip 名称
192.168.111.75 master
192.168.111.76 slave1
192.168.111.77 slave2
:wq
重启hosts生效
reboot

5.三台机器之间免密

##三台主机分别生成

秘钥

ssh-keygen -t rsa -P ‘’ -f ~/.ssh/id_rsa
##host验证 在最后添加
vim /etc/ssh/ssh_config
##添加如下内容 尽量和其余的的对齐放
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
##将秘钥分别拷贝给自己和另外两台虚拟机 这个过程需要输入虚拟机的密码
ssh-copy-id -i ~/.ssh/id_rsa.pub root@master
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave1
ssh-copy-id -i ~/.ssh/id_rsa.pub root@slave1
三台互相测试免密

6.zookeeper分布式安装(先操作master)

Master操作
cd /opt
解压安装包
tar -zxvf
cd zookeeper根目录下
创建俩个文件
zkData logs
zkData下面创建myid文件
echo 1 > ./myid
cd 到zookeeper conf下
cp zoo_sample.cfg zoo.cfg
vi ./zoo.cfg
注销掉dataDir=
后面添加
dataDir=/opt/zookeeper-3.4.10/zkData
dataLogDir=/opt/zookeeper-3.4.10/logs
server.1=192.168.111.75:2888:3888
server.2=192.168.111.76:2888:3888
server.3=192.168.111.77:2888:3888

目录复制到其他俩台节点
scp -r ./zookeeper-3.4.10 root@slave1:/opt/
scp -r ./zookeeper-3.4.10 root@slave2:/opt/

修改
slave1 myid 为2
Slave2 myid 为3
三台配置环境变量
vi /etc/profile
export ZOOKEEPER_HOME=/opt/zookeeper-3.4.10
export PATH=

    P
   
   
    A
   
   
    T
   
   
    H
   
   
    :
   
  
  
   PATH:
  
 
PATH:ZOOKEEPER_HOME/bin

source /etc/profile
启动一个一个来(zookeeper选举)
zkServer.sh start
zkServer.sh stop
zkServer.sh restart
zkServer.sh status(查看状态)

7.mysql 单节点/主备 安装

mysql安装包地址 https://downloads.mysql.com/archives/community/
1、检查已安装的mariadb,并强制卸载
rpm -qa | grep mariadb
rpm -e --nodeps mariadb-libs-5.5.52-1.el7.x86_64
添加mysql组和mysql用户
groupadd mysql
useradd -r -g mysql mysql
2解压mysql安装包到指定的目录 /usr/local
cd /usr/local //进入目录
tar -zxvf mysql-5.7.36-linux-glibc2.12-x86_64.tar.gz
3将解压后的目录改名为mysql
mv mysql-5.7.36-linux-glibc2.12-x86_64 mysql

更改权限
chown -R mysql:mysql mysql/
创建配置文件vim /etc/my.cnf
[client]

设置mysql客户端默认字符集

default-character-set = utf8mb4
#如果不设置会报错ERROR 2002 (HY000): Can’t connect to local MySQL server through socket
socket=/data/mysql57/data/mysql.sock

[mysqld]
#设置3306端口
port=3306
character-set-server = utf8mb4

设置mysql的安装目录

basedir=/usr/local/mysql

设置mysql数据库的数据的存放目录

datadir=/data/mysql57/data
socket=/data/mysql57/data/mysql.sock

禁用主机名解析

skip-name-resolve

创建新表时将使用的默认存储引擎

default-storage-engine=INNODB
lower_case_table_names=1

过小可能会导致写入(导入)数据失败

max_allowed_packet = 256M
group_concat_max_len = 10240

允许最大连接数

max_connections=200

提到 join 的效率

join_buffer_size = 16M

事务日志大小

innodb_log_file_size = 256M

日志缓冲区大小

innodb_log_buffer_size = 4M

事务在内存中的缓冲

innodb_log_buffer_size = 3M

[mysqldump]

开启快速导出

quick
default-character-set = utf8mb4
max_allowed_packet = 256M
创建目录,改变权限
mkdir -p /data/mysql57/data

chown -R mysql:mysql /data/mysql57/data
#初始化数据库,记下最后一行的密码(牢记初次登录需要)

bin/mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/data/mysql57/data
可能报错:

安装 yum install -y libaio 再次执行

启动数据库
/usr/local/mysql/support-files/mysql.server start
设置自动启动mysql
cp /usr/local/mysql/support-files/mysql.server /etc/rc.d/init.d/mysqld

增加mysqld服务控制脚本执行权限

chmod +x /etc/rc.d/init.d/mysqld
#将mysqld添加到系统服务,并检查是否生效
chkconfig --add mysqld
chkconfig --list mysqld
#启动服务
service mysqld start
#可以使用以下命令启动/停止/重启mysqld服务
service mysqld start/stop/restart

建立一个链接文件。因为系统默认会查找/usr/bin下的命令。

ln -s /usr/local/mysql/bin/mysql /usr/bin
#以root用户登录,输入系统产生的随机密码
mysql -uroot -p

可以拿工具测试连接

Mysql主备搭建

准备2台服务器
主节点安装mysql
查看防火墙状态
systemctl status firewalld

cd /opt
tar -zxvf ./包
mv ./mysql-***** ./mysql
创建用户
useradd mysql
mkdir -p /data/mysql
chown -R mysql:mysql /data/mysql
配置my.cnf信息
vi /etc/my.cnf
[client]
port = 3306
default-character-set=utf8mb4
socket = /data/mysql/data/mysql.sock

[mysqld]
datadir = /data/mysql/data
basedir = /opt/mysql
tmpdir = /tmp
socket = /data/mysql/data/mysql.sock
pid-file = /data/mysql/data/mysql.pid
skip-external-locking = 1
skip-name-resolve = 1
port = 3306
server_id = 833306

default-storage-engine = InnoDB
character-set-server = utf8mb4
default_password_lifetime=0

log

log_timestamps=system
log_bin = /data/mysql/log/mysql-bin
log_bin_index = /data/mysql/log/mysql-bin.index
binlog_format = row
relay_log_recovery=ON
relay_log=/data/mysql/log/mysql-relay-bin
relay_log_index=/data/mysql/log/mysql-relay-bin.index
log_error = /data/mysql/log/mysql-error.log

replication

replicate_wild_ignore_table = information_schema.%,performance_schema.%,sys.% #后面创建新的schema之后可以添加至此,重启数据库生效

semi sync replication settings

plugin_dir=/usr/local/mysql/lib/plugin
plugin_load = “rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so”
loose_rpl_semi_sync_master_enabled = 1
loose_rpl_semi_sync_slave_enabled = 1
loose_rpl_semi_sync_master_timeout = 5000
创建log目录
cd /data/mysql
mkdir ./log
chown -R mysql:mysql /data/mysql/log

到mysql bin目录下
初始化mysql
./mysqld --defaults-file=/etc/my.cnf --basedir=/usr/local/mysql/ --datadir=/data/mysql/data --user=mysql --initialize

查看密码
cat /data/mysql/log/mysql-error.log

cp /opt/mysql/support-files/mysql.server /etc/init.d/mysql
service mysql start

登录mysql
/opt/mysql/bin/mysql -u root -p

执行
set password = password(‘17696102035’);
alter user ‘root’@‘localhost’ password expire never;
flush privileges;
use mysql;
update user set host =‘%’ where user = ‘root’;
flush privileges;
exit

从节点安装mysql 区别地方
Vi my.cnf
[client]
port = 3306
default-character-set=utf8mb4
socket = /data/mysql/data/mysql.sock

[mysqld]
datadir = /data/mysql/data
basedir = /opt/mysql
tmpdir = /tmp
socket = /data/mysql/data/mysql.sock
pid-file = /data/mysql/data/mysql.pid
skip-external-locking = 1
skip-name-resolve = 1
port = 3306
server_id = 723306 #server id与主库不同
read_only=1 #设置standby为只读

default-storage-engine = InnoDB
character-set-server = utf8mb4
default_password_lifetime=0

log

log_timestamps=system
log_bin = /data/mysql/log/mysql-bin
log_bin_index = /data/mysql/log/mysql-bin.index
binlog_format = row
relay_log_recovery=ON
relay_log=/data/mysql/log/mysql-relay-bin
relay_log_index=/data/mysql/log/mysql-relay-bin.index
log_error = /data/mysql/log/mysql-error.log

replication

replicate_wild_ignore_table = information_schema.%,performance_schema.%,sys.%

semi sync replication settings

plugin_dir=/usr/local/mysql/lib/plugin
plugin_load = “rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so”
loose_rpl_semi_sync_master_enabled = 1
loose_rpl_semi_sync_slave_enabled = 1
loose_rpl_semi_sync_master_timeout = 5000

主节点创建同步用户
进入mysql

CREATE USER ‘mysync’@‘%’ IDENTIFIED BY ‘mysync’;
GRANT REPLICATION SLAVE ON . TO ‘mysync’@‘%’ IDENTIFIED BY ‘mysync’;
FLUSH PRIVILEGES;
show master status;

从节点设置
进入mysql

change master to
-> master_host=‘192.168.4.205’,
-> master_user=‘mysync’,
-> master_password=‘mysync’,
-> master_log_file=‘mysql-bin.000002’,
-> master_log_pos=2276;

start slave;
show slave status\G;

测试
主节点创建库 从节点查看

8.安装Hadoop分布式

Namenode 放在第一台 yarn 主节点放在第二台 secondarynamenode反在地3台

  1. 修改hadoop目录./etc/hadoop下 修改JAVA_HOME 在hadoop-env.sh yarn-env.sh mapred-env.sh 中 export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.332.b09-1.el7_9.x86_64 2修改集群的配置文件,主要是以下四个 core-site.xml、hdfs-site.xml、yarn-site.xml、mapred-site.xml、workers core-site.xml
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://192.168.111.75:8020</value>
</property>

<!-- 指定hadoop数据的存储目录 -->
<property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/hadoop-3.2.3/data</value>
</property>

<!-- 配置HDFS网页登录使用的静态用户为root -->
<property>
    <name>hadoop.http.staticuser.user</name>
    <value>root</value>

hadoop.proxyuser.root.hosts * hadoop.proxyuser.root.groups * hdfs-site.xml
dfs.namenode.http-address 192.168.111.75:50070
dfs.namenode.secondary.http-address 192.168.111.77:9868 Yarn-site.xml
yarn.nodemanager.aux-services mapreduce_shuffle

<!-- 指定ResourceManager的地址-->
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>192.168.111.76</value>
</property>

<!-- 环境变量的继承 -->
<property>
    <name>yarn.nodemanager.env-whitelist</name>
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>

yarn.log-aggregation-enable true
yarn.log.server.url http://192.168.111.75:19888/jobhistory/logs
yarn.log-aggregation.retain-seconds 604800 mapred-site.xml
mapreduce.framework.name yarn

mapreduce.jobhistory.address 192.168.111.75:10020
mapreduce.jobhistory.webapp.address 192.168.111.75:19888 Workers 192.168.111.75 192.168.111.76 192.168.111.77 Hadoop 文件 scp到其他俩台 Scp -r ./Hadoop……. root@slave1:/opt/ Scp -r ./Hadoop……. root@slave2:/opt/

主节点 格式化 namenode
hdfs namenode -format

启动hdfs 主节点
start-dfs.sh
启动 yarn 在第二台启动
start-yarn.sh
主节点启动历史服务器
mapred --daemon start historyserver

查看web hdfs页面 yarn history
http:// 192.168.111.75:50070
http:// 192.168.111.76:8088
http://192.168.111.75:19888/jobhistory

9.Flume单节点安装

解压文件
tar -zxvf flume********.tar.gz
修改名称、
mv ./**** ./****
进入/conf目录下
cp flume-env.sh.template flume-env.sh
vi flume-env.sh
修改
export JAVA_HOME=/opt/soft/jdk180
配置环境变量(vi /etc/profile)

export FLUME_HOME=/opt/soft/flume160 //你的fulume路径
export PATH=

    P
   
   
    A
   
   
    T
   
   
    H
   
   
    :
   
  
  
   PATH:
  
 
PATH:FLUME_HOME/bin

source /etc/profile
验证是否安装成功进入fluem目录下
./bin/flume-ng version
显示版本

10.消息组件kafka安装

解压(主节点)
tar -zxvf
cd /opt/kafka_2.12-1.1.0/config/
cp server.properties server.properties_default

cd /opt/kafka_2.12-1.1.0
mkdir ./data

cd /opt/kafka_2.12-1.1.0/config/
vi ./server.properties

broker.id=1
log.dirs=/opt/kafka_2.12-1.1.0/data
listeners=PLAINTEXT://192.168.111.75:9092
zookeeper.connect=192.168.111.75:2181,192.168.111.76:2181,192.168.111.77:2181

分发子节点
scp -r ./kafka_2.12-1.1.0 root@slave1:/opt/
scp -r ./kafka_2.12-1.1.0 root@slave2:/opt/

修改savel1,savel2 server.properties
listeners=PLAINTEXT://192.168.111.76:9092
broker.id=2

listeners=PLAINTEXT://192.168.111.77:9092
broker.id=3
配置环境变量3台全部修改
vi /etc/profile
添加
export KAFKA_HOME=/opt/kafka_2.12-1.1.0
export PATH=

    P
   
   
    A
   
   
    T
   
   
    H
   
   
    :
   
  
  
   PATH:
  
 
PATH:KAFKA_HOME/bin

source /etc/profile

3台启动执行
/opt/kafka_2.12-1.1.0/bin/kafka-server-start.sh -daemon /opt/kafka_2.12-1.1.0/config/server.properties
3台关闭执行
/opt/kafka_2.12-1.1.0/bin/kafka-server-stop.sh
测试
kafka-topics.sh --list --zookeeper 192.168.111.75:2181,192.168.111.76:2181,192.168.111.77:2181
kafka-topics.sh --create --zookeeper 192.168.111.75:2181,192.168.111.76:2181,192.168.111.77:2181 --replication-factor 1 --partitions 1 --topic test_topic
/opt/kafka_2.12-1.1.0/bin/kafka-console-producer.sh --broker-list 192.168.111.76:9092 --topic test_topic
kafka-console-consumer.sh --zookeeper 192.168.111.75:2181,192.168.111.76:2181,192.168.111.77:2181 --topic test_topic --from-beginning

11.HBASE分布式安装

主节点
解压
Tar -zxvf
编辑 hbase/conf regionservers
添加
192.168.111.75
192.168.111.76
192.168.111.77
新建 backup-masters (设置master备份节点这里我设置txynebo2)
编辑
192.168.111.76
编辑 hbase-site.xmla

hbase.cluster.distributed
true

hbase.tmp.dir
./tmp

hbase.unsafe.stream.capability.enforce
false

hbase.zookeeper.quorum
192.168.111.75,192.168.111.76,192.168.111.77

hbase.zookeeper.property.dataDir
/opt/zookeeper-3.4.10/zkData

hbase.zookeeper.property.clientPort
2181

hbase.rootdir
hdfs://192.168.111.75:8020/hbase

hbase.master
60000

hbase.master.info.port
60010

修改Hbase-env.sh
export JAVA_HOME=…………………………
export HBASE_MANAGES_ZK=false
使用独立的Zookeeper集群
然后把Hbase目录递归发送到其他两台子节点
scp -r ./ hbase-2.4.11 root@slave1:/opt/
scp -r ./ hbase-2.4.11 root@slave2:/opt/
然后3台配置环境变量
Vi /etc/profile
export HBASE_HOME=/opt/hbase-2.4.11
export HBASE_CLASSPATH=

    H
   
   
    B
   
   
    A
   
   
    S
   
   
    
     E
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    l
   
   
    i
   
   
    b
   
   
    e
   
   
    x
   
   
    p
   
   
    o
   
   
    r
   
   
    t
   
   
    P
   
   
    A
   
   
    T
   
   
    H
   
   
    =
   
  
  
   HBASE_HOME/lib export PATH=
  
 
HBASEH​OME/libexportPATH=PATH:$HBASE_HOME/bin

source /etc/profile
启动Hbase首先启动zookeeper 和hdfs
/opt/hbase-2.4.11/bin/start-hbase.sh

http://192.168.111.75:60010/

12.SPARK分布式安装部署

首先安装Scala(3台)
安装包
Tar -zxvf 解压
配置环境变量
export SPARK_HOME=/opt/spark-3.0.2
export PATH=

    S
   
   
    P
   
   
    A
   
   
    R
   
   
    
     K
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    b
   
   
    i
   
   
    n
   
   
    :
   
  
  
   SPARK_HOME/bin:
  
 
SPARKH​OME/bin:PATH

export PATH=

    S
   
   
    P
   
   
    A
   
   
    R
   
   
    
     K
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    s
   
   
    b
   
   
    i
   
   
    n
   
   
    :
   
  
  
   SPARK_HOME/sbin:
  
 
SPARKH​OME/sbin:PATH

:wq
Source /etc/profile
查看版本 scala -version

开始安装Spark(主节点)
Tar -zxvf 解压
切换到conf目录下
cp spark-env.sh.template spark-env.sh
vi
export SCALA_HOME=/opt/scala-2.12.1
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.332.b09-1.el7_9.x86_64
export HADOOP_HOME=/opt/hadoop-3.2.3
export HADOOP_CONF_DIR=KaTeX parse error: Expected 'EOF', got '#' at position 24: …OME/etc/hadoop #̲指定spark主节点,通过主机…SPARK_HOME/bin:

    P
   
   
    A
   
   
    T
   
   
    H
   
   
    e
   
   
    x
   
   
    p
   
   
    o
   
   
    r
   
   
    t
   
   
    P
   
   
    A
   
   
    T
   
   
    H
   
   
    =
   
  
  
   PATH export PATH=
  
 
PATHexportPATH=SPARK_HOME/sbin:$PATH

source /etc/profile
主节点启动
/opt/spark-3.0.2/sbin/start-all.sh

4040端口

关闭(主节点)/opt/spark-3.0.2/sbin/stop-all.sh

13.分布式HIVE安装部署(内容版本参考)

上传主节点解压
tar -zxvf hive-1.1.0-cdh5.14.0.tar.gz
cd 到 hive的conf目录下
cp ./hive-log4j2.properties.template ./hive-log4j2.properties(不用修改)
创建hive-site.xml
touch ./hive-site.xml
vi
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> javax.jdo.option.ConnectionURL jdbc:mysql://192.168.111.75:3306/hive?createDatabaseIfNotExist=true&useSSL=false javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver javax.jdo.option.ConnectionUserName root javax.jdo.option.ConnectionPassword 17696102035 hive.cli.print.current.db true hive.cli.print.header true hive.server2.thrift.bind.host 192.168.111.75 hive.metastore.warehouse.dir /hive/warehouse hive.metastore.local true 将mysql的lib驱动包上传到hive的lib目录下 将mysql-connector-java-5.1.38.jar 上传到这个目录下

将hive整个发送到别的2台节点
scp -r ./hive-2.3.9 root@slave1:/opt/
scp -r ./hive-2.3.9 root@slave2:/opt/
3台同时配置环境变量
Vi /etc/profile
export HIVE_HOME=/opt/hive-2.3.9/
export PATH=

    P
   
   
    A
   
   
    T
   
   
    H
   
   
    :
   
  
  
   PATH:
  
 
PATH:HIVE_HOME/bin

source /etc/profile

删除hive/lib下guava-19.0.jar
拷贝hadoop-3.2.2/share/hadoop/common/lib/ guava-27.0-jre.jar
到hive/lib

schematool -dbType mysql -initSchema

注意此错可能报错 许修改数据库密码啥的
nohup hive --service metastore &
nohup hive --service hiveserver2 &

hvie 进入测试
quit;

14.单节点AZKABAN安装部署

修改时区
tzselect
5
9
1
1
#拷贝时区覆盖本地时区配置
cp /usr/usr/share/zoneinfo/Asia/Shanghai /etc/localtime
sudo date -s ‘2022-02-14 17:39:30’

#新建文件夹
mkdir -p ~/app/azkaban
#解压安装包
tar -zxvf azkaban-sql-script-2.5.0.tar.gz -C ~/app/azkaban/

#创建用户、分配权限并执行脚本
#开启mysql
service mysqld start
#进入mysql
mysql -uroot -p
#创建数据库azkaban
CREATE DATABASE azkaban;
#创建用户
CREATE USER ‘azkaban’@‘%’ IDENTIFIED BY ‘azkaban’;
#分配权限
GRANT SELECT,INSERT,UPDATE,DELETE,CREATE,INDEX ON

azkaban

.* TO ‘azkaban’@‘%’ WITH GRANT OPTION;
#刷新
flush privileges;
#退出
quit;

#以azkaban用户登录数据库
mysql -uazkaban -pazkaban
#查看数据库
show databases;
#使用azkaban
use azkaban;
#导入脚本
source ~/app/azkaban/azkaban-2.5.0/create-all-sql-2.5.0.sql
#退出Mysql
quit;

#解压缩文件
tar -zxvf azkaban-web-server-2.5.0.tar.gz -C /app/azkaban/
#查看数据库版本
mysql -V
#根据数据库版本上传对应的mysql-connector-java,解压下载的安装包即可
/root/packges/Azkaban
#将上述驱动包上传到 ~/app/azkaban/azkaban-web-2.5.0/extlib/
cp mysql-connector-java-5.1.47.jar ~/app/azkaban/azkaban-web-2.5.0/extlib/
#进入到
/app/azkaban/azkaban-web-2.5.0/extlib/中,然后执行
keytool -keystore keystore -alias jetty -genkey -keyalg RSA
#配置相关信息
keytool -importkeystore -srckeystore keystore -destkeystore keystore -deststoretype pkcs12
#接下来需要配置密码之类的,请记住这些密码,因为下面的配置会用到,这里就不上图了

  1. #移动keystore 到~/app/azkaban/azkaban-web-2.5.0/
  2. mv keystore ~/app/azkaban/azkaban-web-2.5.0/

#修改配置文件 (conf/azkaban.propreties)
vi /root/app/azkaban/azkaban-web-2.5.0/conf/azkaban.properties
#配置的信息如下:
azkaban.name=Job Tasks
azkaban.label=mysteel workflow
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=/root/app/azkaban/azkaban-web-2.5.0/web/
default.timezone.id=Asia/Shanghai

user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/root/app/azkaban/azkaban-web-2.5.0/conf/azkaban-users.xml

database.type=mysql
mysql.port=3306
mysql.host=10.11.1.157
mysql.database=azkaban
mysql.user=azkaban
mysql.password=azkaban
mysql.numconnections=100

Azkaban Jetty server properties.

jetty.maxThreads=25
jetty.ssl.port=8443
jetty.port=8071
jetty.keystore=/root/app/azkaban/azkaban-web-2.5.0/keystore
jetty.password=你的密码
jetty.keypassword=你的密码
jetty.truststore=/root/app/azkaban/azkaban-web-2.5.0/keystore
jetty.trustpassword=你的密码

配置用户权限

vi azkaban-users.xml
#配置的内容如下

#解压缩文件
tar -zxvf azkaban-executor-server-2.5.0.tar.gz -C ~/app/azkaban/
#修改配置文件 (conf/azkaban.propreties)
vi /root/app/azkaban/azkaban-executor-2.5.0/conf/azkaban.properties
#修改的内容如下:
default.timezone.id=Asia/Shanghai

database.type=mysql
mysql.port=3306
mysql.host=10.11.1.157
mysql.database=azkaban
mysql.user=azkaban
mysql.password=azkaban
mysql.numconnections=100

7.登录
#开启数据库
service mysqld start

#开启和关闭azkaban命令
~/app/azkaban/azkaban-executor-2.5.0/bin/azkaban-executor-start.sh
~/app/azkaban/azkaban-executor-2.5.0/bin/azkaban-executor-stop-shutdown.sh
~/app/azkaban/azkaban-web-2.5.0/bin/azkaban-web-start.sh
~/app/azkaban/azkaban-web-2.5.0/bin/azkaban-web-shutdown.sh

#输入以下地址登录
https://你的主机ip:8443 出现界面安装成功

15.Zeepelin安装部署单节点

Cd /opt
上传安装包解压
tar -zxvf ./zeppelin-0.10.1-bin-all.tgz
mv ./ zeppelin-0.10.1-bin-all.tgz ./zeppelin-0.10.1
cd /opt/soft/zeppelin081/conf/
cp zeppelin-site.xml.template zeppelin-site.xml

zeppelin.server.addr
192.168.111.75
Server binding address

zeppelin.server.port 8000 Server port.

cp zeppelin-env.sh.template zeppelin-env.sh
vi zeppelin-env.sh
export JAVA_HOME=填写自己的java_home配置路径
export HADOOP_CONF_DIR=填写自己得hadoop得conf配置路径
配置hive解释器
1.复制hive-site.xml到zeppelin-0.10.1/conf下
cp /opt/soft/hive110/conf/hive-site.xml/opt/soft/ zeppelin-0.10.1/conf/
2.拷贝jar包,拷贝下面两个jar包到zeppelin安装目录下interperter中。
hadoop-common-3.2.3.jar
hive-jdbc-3.1.2-standalone.jar

cp /opt/hive-3.1.2/jdbc/hive-jdbc-3.1.2-standalone.jar/opt/zeppelin-0.10.1/interpreter/jdbc/
cp
/opt/hadoop-3.2.3/share/hadoop/common/hadoop-common-3.2.3.jar/opt/zeppelin-0.10.1/interpreter/jdbc/

设置properties
default.driver org.apache.hive.jdbc.HiveDriver
default.url jdbc:hive2://192.168.111.75:10000
default.user root
default.password 17696102035

save 保存

重启zeepelin(前提启动hive+server2)
/opt/zeppelin-0.10.1/bin/zeppelin-daemon.sh restart
start 启动 stop 停止
http://192.168.111.75:8000/

16.sqoop单机版安装部署

解压安装包
Tar -zxvf ./sqoop-1.4.7.bin__hadoop-2.6.0.tar

添加环境变量
Vi /etc/profile
export SQOOP_HOME=/opt/sqoop-1.4.7
export PATH=:

    S
   
   
    Q
   
   
    O
   
   
    O
   
   
    
     P
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    b
   
   
    i
   
   
    n
   
   
    :
   
  
  
   SQOOP_HOME/bin:
  
 
SQOOPH​OME/bin:PATH

export HCAT_HOME=/opt/hive-3.1.2/hcatalog
export PATH=:

    H
   
   
    C
   
   
    A
   
   
    
     T
    
    
     H
    
   
   
    O
   
   
    M
   
   
    E
   
   
    /
   
   
    b
   
   
    i
   
   
    n
   
   
    :
   
  
  
   HCAT_HOME/bin:
  
 
HCATH​OME/bin:PATH

source /etc/profile
移动到sqoop的cong目录下修改sqoop-env.sh
cd /opt/sqoop-1.4.7/conf/
mv sqoop-env-template.sh sqoop-env.sh

vi ./sqoop-env.sh
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/opt/hadoop-3.2.3

#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/opt/hadoop-3.2.3

#set the path to where bin/hbase is available
export HBASE_HOME=/opt/hbase-2.4.11

#Set the path to where bin/hive is available
export HIVE_HOME=/opt/hive-3.1.2

#Set the path for where zookeper config dir is
export ZOOCFGDIR=/opt/zookeeper-3.4.10

拷贝jdbc 和hive驱动 和commons-lang-2.6.jar到sqoop的lib目录下(5.1.49)
cp ./mysql-connector-java-5.1.49.jar /opt/sqoop-1.4.7/lib/
cp ./ commons-lang-2.6.jar /opt/sqoop-1.4.7/lib/
cp /opt/hive-3.1.2/jdbc/hive-jdbc-3.1.2-standalone.jar /opt/sqoop-1.4.7/lib/
测试Sqoop是否能够成功连接数据库

sqoop list-databases --connect jdbc:mysql://master:3306/ --username root --password root

迁移demo命令(hive到mysql 全表迁移)
sqoop export --connect ‘jdbc:mysql://192.168.111.75:3306/sqoop_db_hive?useSSL=false’ --username root --password 17696102035 --table dwd_ryry_test --hcatalog-database dwd_test --hcatalog-table dwd_ryry_test -m 1 --fields-terminated-by “\0001”

标签: hadoop

本文转载自: https://blog.csdn.net/Sparklvzhixin/article/details/125562335
版权归原作者 吕道友 所有, 如有侵权,请联系我们删除。

“【原生HADOOP分布式集群搭建】”的评论:

还没有评论