1 软硬件资源准备
1.1 服务器资源
主机名 | IP | 类型 | 配置 | 备注 |
---|---|---|---|---|
k8s-lb1 | 172.20.1.81 | 虚机 | 4C/8GB/50GB | 负载均衡器 HAProxy+Keepalived |
k8s-lb2 | 172.20.1.82 | 虚机 | 4C/8GB/50GB | 负载均衡器 HAProxy+Keepalived |
k8s-master1 | 172.20.1.78 | 虚机 | 6C/48GB/50GB+100GB | k8s控制平面节点 |
k8s-master2 | 172.20.1.79 | 虚机 | 6C/48GB/50GB+100GB | k8s控制平面节点 |
k8s-master3 | 172.20.1.80 | 虚机 | 6C/48GB/50GB+100GB | k8s控制平面节点 |
k8s-node1 | 172.20.1.71 | 物理机 | 48C/384GB/480GBx2 RAID1+480GBx4/Tesla P4 | k8s工作节点 + Ceph OSD + GPU |
k8s-node2 | 172.20.1.72 | 物理机 | 48C/384GB/480GBx2 RAID1+480GBx4/Tesla P4 | k8s工作节点 + Ceph OSD + GPU |
k8s-node3 | 172.20.1.73 | 物理机 | 48C/384GB/480GBx2 RAID1+480GBx4/Tesla P4 | k8s工作节点 + Ceph OSD + GPU |
k8s-node4 | 172.20.1.74 | 物理机 | 48C/384GB/480GBx2 RAID1+480GBx4/Tesla P4 | k8s工作节点 + Ceph OSD + GPU |
172.20.1.70 | – | – | 虚拟IP(在lb1,lb2中漂移) | |
172.20.1.200-250 | – | – | 集群L2 EIP |
1.2 操作系统
CentOS 7.9.2009 5.4Kernel
1.3 系统配置及参数优化
CentOS7 EOL之后 yum不可用的问题
sed -i 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-*
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
#修改文件句柄数限制
cat >> /etc/security/limits.conf << EOF
* soft nofile 1048576
* hard nofile 1048576
EOF
#修改系统参数
cat >> /etc/sysctl.conf << EOF
fs.file-max= 10485760
vm.max_map_count= 262144
kernel.pid_max = 4194303
vm.swappiness = 0
net.ipv4.ip_local_port_range= 1024 65535
net.ipv4.tcp_mem= 786432 2097152 3145728
net.ipv4.tcp_rmem= 4096 4096 16777216
net.ipv4.tcp_wmem= 4096 4096 16777216
net.ipv4.tcp_max_orphans= 131072
EOF
sysctl -p
#关闭防火墙
systemctl disable firewalld --now
#关闭NetworkManager
systemctl disable NetworkManager --now
#关闭SELinux
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config
#禁用swap
swapoff -a
sed -i '/swap / s/^\(.*\)$/#\1/g' /etc/fstab
#释放swap分区空间
lvremove /dev/mapper/centos-swap
lvextend -l +100%FREE /dev/mapper/centos-root
xfs_growfs /dev/mapper/centos-root
sed -i 's/rd.lvm.lv=centos\/swap//' /etc/default/grub
grub2-mkconfig >/etc/grub2.cfg
#修改yum源
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
yum makecache
#升级系统软件
yum update -y
#升级lt内核(视网卡支持情况可能需要mt内核) 注意2024年6月份之后CentOS EOL之后升级内核可能需要修改到Archive目录,具体请上网搜索
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
yum install -y https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
sed -i 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/elrepo.repo
sed -i 's|elrepo.org/linux|mirrors.aliyun.com/elrepo/archive|g' /etc/yum.repos.d/elrepo.repo
yum --disablerepo=\* --enablerepo=elrepo-kernel repolist
yum --disablerepo=\* --enablerepo=elrepo-kernel install kernel-lt.x86_64 -y
yum remove kernel-tools-libs kernel-tools -y
yum install perl -y
yum --disablerepo=\* --enablerepo=elrepo-kernel install kernel-lt-tools kernel-lt-headers kernel-lt-devel -y
#设置内核启动顺序
grub2-set-default 0
#重启
reboot
rpm -qa kernel*
# 删除旧内核
yum remove kernel-3.10*
2 安装Kubernetes(不含Kubesphere)
2.1 负载均衡配置
- 安装
在k8s-lb1
,k8s-lb2
上运行以下命令安装 Keepalived 和 HAProxy。
yum install keepalived haproxy psmisc chrony -y
- 配置HAProxy
在k8s-lb1
,k8s-lb2
上分别执行以下配置
cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg_bak
cat > /etc/haproxy/haproxy.cfg <<EOF
global
log /dev/log local0 warning
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats
defaults
log global
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
frontend kube-apiserver
bind *:6443
mode tcp
option tcplog
default_backend kube-apiserver
backend kube-apiserver
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server kube-apiserver-1 172.20.1.78:6443 check # Replace the IP address with your own.
server kube-apiserver-2 172.20.1.79:6443 check # Replace the IP address with your own.
server kube-apiserver-3 172.20.1.80:6443 check # Replace the IP address with your own.
EOF
systemctl restart haproxy
systemctl enable haproxy
- 配置Keepalived
k8s-lb1
cp /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf_bak
cat > /etc/keepalived/keepalived.conf <<EOF
global_defs {
notification_email {
}
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_haproxy {
script "killall -0 haproxy"
interval 2
weight 2
}
vrrp_instance haproxy-vip {
state BACKUP
priority 100
interface ens192 # 注意这里的网卡名称
virtual_router_id 60
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
unicast_src_ip 172.20.1.81 # 本机IP
unicast_peer {
172.20.1.82 # 另一台的IP
}
virtual_ipaddress {
172.20.1.70/24 # 虚拟IP
}
track_script {
chk_haproxy
}
}
EOF
systemctl restart keepalived
systemctl enable keepalived
k8s-lb2
cp /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf_bak
cat > /etc/keepalived/keepalived.conf <<EOF
global_defs {
notification_email {
}
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_haproxy {
script "killall -0 haproxy"
interval 2
weight 2
}
vrrp_instance haproxy-vip {
state BACKUP
priority 100
interface ens192 # 注意这里的网卡名称
virtual_router_id 60
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
unicast_src_ip 172.20.1.82 # 本机IP
unicast_peer {
172.20.1.81 # 另一台的IP
}
virtual_ipaddress {
172.20.1.70/24 # 虚拟IP
}
track_script {
chk_haproxy
}
}
EOF
systemctl restart keepalived
systemctl enable keepalived
- 验证
# 先在两台上分别执行 查看虚拟IP 172.20.1.70 当前在哪个节点?
ip add
# 在虚拟IP所在的节点上停止HAProxy模拟故障
systemctl stop haproxy
# 在另一台查看 确认虚拟IP已转移
ip add
2.2 安装KubeKey
在任意节点上安装KubeKey,这里使用k8s-master1
echo "export KKZONE=cn" >> /etc/profile
source /etc/profile
curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.13 sh -
chmod +x kk
2.3 安装Kubernetes
在所有k8s节点上安装必要依赖组件
yum install -y conntrack socat ebtables ipset ipvsadm
KuebKey
所在节点执行
# 查看支持的版本
./kk version --show-supported-k8s
# 创建集群配置文件 这里不含Kubesphere
./kk create config --with-kubernetes v1.24.14 -f config-taihuyun.yaml
- 注意 关于k8s版本,v1.23为支持Docker引擎的最后一个版本 但如果要用nvidia/gpu则需要使用 containerd 引擎,则需要使用v1.24以上版本 或者 设置containerManager: containerd
修改配置文件config-taihuyun.yaml
apiVersion: kubekey.kubesphere.io/v1alpha2
kind: Cluster
metadata:
name: taihuyun
spec:
hosts:
- { name: k8s-master1, address: 172.20.1.78, internalAddress: 172.20.1.78, user: root, password: "xxxx" }
- { name: k8s-master2, address: 172.20.1.79, internalAddress: 172.20.1.79, user: root, password: "xxxx" }
- { name: k8s-master3, address: 172.20.1.80, internalAddress: 172.20.1.80, user: root, password: "xxxx" }
- { name: k8s-worker1, address: 172.20.1.71, internalAddress: 172.20.1.71, user: root, password: "xxxx" }
- { name: k8s-worker2, address: 172.20.1.72, internalAddress: 172.20.1.72, user: root, password: "xxxx" }
- { name: k8s-worker3, address: 172.20.1.73, internalAddress: 172.20.1.73, user: root, password: "xxxx" }
- { name: k8s-worker4, address: 172.20.1.74, internalAddress: 172.20.1.74, user: root, password: "xxxx" }
roleGroups:
etcd:
- k8s-master[1:3]
control-plane:
- k8s-master[1:3]
worker:
- k8s-worker[1:4]
controlPlaneEndpoint:
domain: lb.kubesphere.local
address: "172.20.1.70" #虚拟IP
port: 6443
system:
ntpServers:
- ntp.aliyun.com
- time1.cloud.tencent.com
- k8s-master1
timezone: "Asia/Shanghai"
kubernetes:
version: v1.23.17
clusterName: cluster.local
autoRenewCerts: true
containerManager: docker
etcd:
type: kubekey
network:
plugin: calico
kubePodsCIDR: 10.233.64.0/18
kubeServiceCIDR: 10.233.0.0/18
## multus support. https://github.com/k8snetworkplumbingwg/multus-cni
multusCNI:
enabled: false
registry:
privateRegistry: ""
namespaceOverride: ""
registryMirrors:
- https://xxxxxxx # 自备
insecureRegistries: [ ]
addons: [ ]
开始安装
./kk create cluster -f config-taihuyun.yaml
2.4 安装Rook
- 下载Rook代码
yum install git -y
git clone --single-branch --branch v1.15.0 https://github.com/rook/rook.git
- 修改配置
cd rook/deploy/examples
vi operator.yaml
# 将quay.io替换为quay.mirrors.enxe.tk 并取消行前注释
ROOK_CSI_CEPH_IMAGE: "quay.mirrors.enxe.tk/cephcsi/cephcsi:v3.12.0"
ROOK_CSIADDONS_IMAGE: "quay.mirrors.enxe.tk/csiaddons/k8s-sidecar:v0.9.0"
# 将registry.k8s.io替换为k8s.mirrors.enxe.tk 并取消行前注释
ROOK_CSI_REGISTRAR_IMAGE: "k8s.mirrors.enxe.tk/sig-storage/csi-node-driver-registrar:v2.11.1"
ROOK_CSI_RESIZER_IMAGE: "k8s.mirrors.enxe.tk/sig-storage/csi-resizer:v1.11.1"
ROOK_CSI_PROVISIONER_IMAGE: "k8s.mirrors.enxe.tk/sig-storage/csi-provisioner:v5.0.1"
ROOK_CSI_SNAPSHOTTER_IMAGE: "k8s.mirrors.enxe.tk/sig-storage/csi-snapshotter:v8.0.1"
ROOK_CSI_ATTACHER_IMAGE: "k8s.mirrors.enxe.tk/sig-storage/csi-attacher:v4.6.1"
# 修改 启用设备发现
ROOK_ENABLE_DISCOVERY_DAEMON: "true"
- 部署RookOperator
kubectl create -f crds.yaml -f common.yaml -f operator.yaml
watch -n 1 'kubectl -n rook-ceph get pod -o wide'
在继续之前,请确认rook-ceph-operator
和所有rook-discover
处于Running
状态
- 部署Ceph集群
vi cluster.yaml
spec:
dashboard:
enabled: true
ssl: false # 关闭dashboard的 https 后期配置ingress访问
# 给k8s节点打上标签
for i in {1..4}; do kubectl label nodes k8s-worker$i ceph-osd=enabled; done
# 安装Ceph集群
kubectl create -f cluster.yaml
# 通过查看命名空间中的 pod 来验证集群是否正在运行rook-ceph 确保所有的mgr、mon、osd都已处于运行状态
watch -n 1 'kubectl -n rook-ceph get pod -o wide'
- 安装交互式工具箱
# 修改配置 替换 quay.io -> quay.mirrors.enxe.tk
sed -i 's|quay.io|quay.mirrors.enxe.tk|g' toolbox.yaml
# 安装
kubectl create -f toolbox.yaml
# 等待其运行
watch -n 1 'kubectl -n rook-ceph rollout status deploy/rook-ceph-tools'
# 进入工具箱
kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash
#ceph 常用命令
ceph status
ceph osd status
ceph df
rados df
- 创建cephfs文件系统
kubectl apply -f filesystem.yaml
kubectl apply -f csi/cephfs/storageclass.yaml
kubectl apply -f csi/cephfs/snapshotclass.yaml
- 创建ceph-rbd块存储系统
kubectl apply -f csi/rbd/storageclass.yaml
kubectl apply -f csi/rbd/snapshotclass.yaml
# 修改默认storageClass
kubectl patch storageclass rook-ceph-block -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
# 查看
kubectl get sc
配置rbd插件节点容忍度
修改 ds/csi-rbdplugin
守护进程节点容忍度 使其能在master节点上运行 否则master节点无法挂载ceph-rbd
pvc
kubectl -n rook-ceph edit ds/csi-rbdplugin
spec:
template:
spec:
tolerations: # 添加容忍度配置
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- 创建对象存储系统
kubectl create -f object.yaml
# 等待 RGW pod 启动
watch -n 1 'kubectl -n rook-ceph get pod -l app=rook-ceph-rgw'
#创建存储桶 存储类
cat << EOF | kubectl apply -f -
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rook-ceph-bucket
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
provisioner: rook-ceph.ceph.rook.io/bucket
reclaimPolicy: Delete
parameters:
objectStoreName: my-store
objectStoreNamespace: rook-ceph
EOF
# 创建测试存储桶
cat << EOF | kubectl apply -f -
apiVersion: objectbucket.io/v1alpha1
kind: ObjectBucketClaim
metadata:
name: ceph-bucket-test
spec:
generateBucketName: test
storageClassName: rook-ceph-bucket
EOF
# 通过以下命令获取连接存储桶关键参数,然后通过S3客户端连接
#config-map, secret, OBC will part of default if no specific name space mentioned
export AWS_HOST=$(kubectl -n default get cm ceph-bucket-test -o jsonpath='{.data.BUCKET_HOST}')
export PORT=$(kubectl -n default get cm ceph-bucket-test -o jsonpath='{.data.BUCKET_PORT}')
export BUCKET_NAME=$(kubectl -n default get cm ceph-bucket-test -o jsonpath='{.data.BUCKET_NAME}')
export AWS_ACCESS_KEY_ID=$(kubectl -n default get secret ceph-bucket-test -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 --decode)
export AWS_SECRET_ACCESS_KEY=$(kubectl -n default get secret ceph-bucket-test -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 --decode)
echo -e "export AWS_HOST=$AWS_HOST\nexport PORT=$PORT\nexport BUCKET_NAME=$BUCKET_NAME\nexport AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID\nexport AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY"
- 仪表板访问
添加Ingress路由指向服务rook-ceph-mgr-dashboard
即可
默认用户admin
,密码通过一下指令获取
kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}" | base64 --decode && echo
- 测试
# 创建pvc
kubectl apply -f csi/rbd/pvc.yaml
# 创建挂载pvc的pod
kubectl apply -f csi/rbd/pod.yaml
# 等待pod Running
kubectl get pod/csirbd-demo-pod
# 进入pod
kubectl exec -it pod/csirbd-demo-pod -- bash
# 查看rdb挂载
df -h | grep rbd
cd /var/lib/www/html
# 使用dd 测试读写性能
dd if=/dev/zero of=./test conv=fsync bs=1M count=300
dd of=/dev/zero if=./test conv=fsync bs=1M count=300
exit
# 删除测试pvc、pod
kubectl delete -f csi/rbd/pod.yaml
kubectl delete -f csi/rbd/pvc.yaml
- 移除异常OSD
vi osd-purge.yaml
spec:
template:
metadata:
labels:
app: rook-ceph-purge-osd
spec:
serviceAccountName: rook-ceph-purge-osd
containers:
- name: osd-removal
image: docker.io/rook/ceph:v1.15.0
# TODO: Insert the OSD ID in the last parameter that is to be removed
# The OSD IDs are a comma-separated list. For example: "0" or "0,2".
# If you want to preserve the OSD PVCs, set `--preserve-pvc true`.
#
# A --force-osd-removal option is available if the OSD should be destroyed even though the
# removal could lead to data loss.
args:
- "ceph"
- "osd"
- "remove"
- "--preserve-pvc"
- "false"
- "--force-osd-removal"
- "false"
- "--osd-ids"
- "16" # 需改需要移除的节点ID
kubectl apply -f osd-purge.yaml # 如无法创建 请先删除已完成的job.batch/rook-ceph-purge-osd
- 清除Ceph Unhealthy状态
# 进入工具箱
kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- /bin/bash
# 查看
ceph crash ls
# 归档
ceph crash archive-all
# 删除所有Crash日志 (保留0天) 可选操作
ceph crash prune 0
- 仪表板Cluster Utilization无数据问题 官方文档
cd rook/deploy/examples/monitoring
kubectl create -f service-monitor.yaml
kubectl create -f exporter-service-monitor.yaml
kubectl create -f prometheus.yaml
kubectl create -f prometheus-service.yaml
watch -n 1 'kubectl -n rook-ceph get pod prometheus-rook-prometheus-0'
kubectl -n rook-ceph edit CephCluster/rook-ceph
编辑以下内容
spec:
dashboard:
prometheusEndpoint: http://rook-prometheus.rook-ceph.svc.cluster.local:9090
prometheusEndpointSSLVerify: true
稍后刷新dashboard就能看到监控数据了
2.5 安装kubesphere
重新生成config-taihuyun.yaml
./kk create config --with-kubesphere v3.4.1 --with-kubernetes v1.23.17 -f config-taihuyun.yaml
# 编辑其中的配置 使与之前保持一致
安装
./kk create cluster -f config-taihuyun.yaml
# 查看日志
kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f
2.6 集群配置
2.6.1 启动集群网关
注解
访问模式: LoadBalancer
负载均衡器提供商: OpenELB
注解:
'eip.openelb.kubesphere.io/v1alpha2': eip-pool
'lb.kubesphere.io/v1alpha1': openelb
'protocol.openelb.kubesphere.io/v1alpha1': layer2
配置选项:
use-forwarded-headers: true # 用于记录客户端真实IP
2.6.2 修改Worker节点容器组限制数量
所有worker节点执行
vi /var/lib/kubelet/config.yaml
# 修改
# maxPods: 1100
# podPidsLimit: 100000
# 重启kubelet生效 重启不影响集群运行
systemctl restart kubelet
2.6.3 SSL证书管理工具安装
安装cert-manager
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm install \
cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--version v1.12.13 \
--set installCRDs=true
安装 阿里云DNS webhook
# Install alidns-webhook to cert-manager namespace.
wget -O alidns-webhook.yaml https://raw.githubusercontent.com/pragkent/alidns-webhook/master/deploy/bundle.yaml
sed -i 's/yourcompany.com/xxx.com/' alidns-webhook.yaml
kubectl apply -f alidns-webhook.yaml
export base64_access_key=$(echo -n "xxx"|base64)
export base64_secret_key=$(echo -n "xxx"|base64)
cat << EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
name: alidns-secret
namespace: cert-manager
data:
access-key: ${base64_access_key}
secret-key: ${base64_secret_key}
EOF
export email=xxx@xxx.com
export group_name=acme.xxx.com
#创建 Issuer/ClusterIssuer
cat << EOF | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
email: ${email}
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-prod
solvers:
- dns01:
webhook:
groupName: ${group_name}
solverName: alidns
config:
region: ""
accessKeySecretRef:
name: alidns-secret
key: access-key
secretKeySecretRef:
name: alidns-secret
key: secret-key
EOF
证书申请说明
cat << EOFF | kubectl apply -f -
kind: ConfigMap
apiVersion: v1
metadata:
name: readme
namespace: cert-manager
annotations:
kubesphere.io/description: 申请证书看这里
data:
readme: |-
namespace=<项目名称>
domain=<申请证书的域名>
domain_name=\$(echo \${domain} | sed 's/\./-/g')
# 在需要使用证书的项目(namespace)下创建Certificate 这里以 kubesphere-system 为例
cat <<EOF | kubectl apply -f -
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: cert-\${domain_name}
namespace: \${namespace}
spec:
secretName: tls-\${domain_name}
commonName:
dnsNames:
- "*.\${domain}"
issuerRef:
name: letsencrypt-prod
kind: ClusterIssuer
EOF
# 等待几分钟后查看证书 READY 状态
watch -n 1 "kubectl get certificate -n \${namespace}"
EOFF
2.6.4 启用二层网络模式下的负载均衡
安装OpenELB
wget https://raw.githubusercontent.com/openelb/openelb/release-0.6/deploy/openelb.yaml
sed -i 's|registry.k8s.io/ingress-nginx/kube-webhook-certgen:v1.1.1|kubesphere/kube-webhook-certgen:v1.1.1|g' openelb.yaml
kubectl apply -f openelb.yaml
# 检查OpenELB安装成功 确保所有pod均Running后继续
watch -n 1 'kubectl get po -n openelb-system'
首先需要为 kube-proxy 启用 strictARP,以便 Kubernetes 集群中的所有网卡停止响应其他网卡的 ARP 请求,而由 OpenELB 处理 ARP 请求。
kubectl edit configmap kube-proxy -n kube-system
ipvs:
strictARP: true
# 重启 kube-proxy
kubectl rollout restart daemonset kube-proxy -n kube-system
确保已启用 Layer2 模式
kubectl edit ds -n openelb-system openelb-speaker
containers:
- command:
- openelb-speaker
args:
- --api-hosts=:50051
- --enable-layer2=true # 改为true
创建EIP池
cat << EOF | kubectl apply -f -
apiVersion: network.kubesphere.io/v1alpha2
kind: Eip
metadata:
name: eip-pool
spec:
address: 172.20.1.200-172.20.1.250
protocol: layer2
disable: false
interface: can_reach:172.20.1.254
EOF
3 附录
3.1 分区创建和挂载
# 查看未分区磁盘
fdisk -l
# 假设未分区磁盘为 /dev/sdb
fdisk /dev/sdb # 先输入 n 再输入 p 然后一路回车 最后输入w写入分区表
# 格式化
mkfs.xfs /dev/sdb1
# 挂载
mkdir -p /data
mount /dev/sdb1 /data
# 查看分区ID
blkid | grep /dev/sdb1
# 设置自启动
uuid=$(blkid | grep /dev/sdb1 | awk -F'"' '{print $2}')
echo "UUID=${uuid} /data xfs defaults 0 0" >> /etc/fstab
3.2 将docker
、kubelete
数据存储到独立磁盘
安装前操作
mkdir -p /data/docker
mkdir -p /data/kubelet
ln -s /data/docker /var/lib/docker
ln -s /data/kubelet /var/lib/kubelet
如果docker
、kubelete
已安装,并已启动则应先停止docker
、kubelete
然后将数据目录拷贝到/data
目录下,然后再创建软链接,最后再重新启动服务
3.3 启用kubectl命令行自动补全功能
yum install -y bash-completion
echo 'source <(kubectl completion bash)' >>~/.bashrc
kubectl completion bash >/etc/bash_completion.d/kubectl
exit # 退出当前会话 重新进入即可使用自动补全功能
3.4 添加本地存储类
kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.29/deploy/local-path-storage.yaml
3.5 找不到RPM-GPG-KEY-EPEL-7错误
cd /etc/pki/rpm-gpg
wget https://archive.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
3.6 显卡安装
- 升级系统组件
# 升级GCC
yum install -y gcc gcc-c++ bzip2
wget https://gcc.gnu.org/pub/gcc/releases/gcc-9.3.0/gcc-9.3.0.tar.gz
tar -xzvf gcc-9.3.0.tar.gz
mkdir gcc-build-9.3.0
mkdir /usr/lib/gcc/x86_64-redhat-linux/9.3.0
cd gcc-9.3.0
./contrib/download_prerequisites
cd ../gcc-build-9.3.0
../gcc-9.3.0/configure --prefix=/usr/lib/gcc/x86_64-redhat-linux/9.3.0/ --enable-checking=release --enable-languages=c,c++ --disable-multilib
make -j42 && make install
# 配置环境变量
cat > /etc/profile.d/gcc-9.3.0.sh << EOF
export PATH=\$PATH:/usr/lib/gcc/x86_64-redhat-linux/9.3.0/bin
EOF
source /etc/profile
# 安装的gcc替换现有的gcc
gcc --version
whereis gcc
mv /usr/bin/gcc /usr/bin/gcc-4.8.5
ln -s /usr/lib/gcc/x86_64-redhat-linux/9.3.0/bin/gcc /usr/bin/
whereis c++
mv /usr/bin/c++ /usr/bin/c++-4.8.5
ln -s /usr/lib/gcc/x86_64-redhat-linux/9.3.0/bin/c++ /usr/bin/
find / -name "libstdc++.so*"
strings /usr/lib64/libstdc++.so.6 | grep GLIBC
cp /usr/lib/gcc/x86_64-redhat-linux/9.3.0/lib64/libstdc++.so.6.0.28 /usr/lib64
cd /usr/lib64/
rm -rf libstdc++.so.6
ln -s libstdc++.so.6.0.28 libstdc++.so.6
# 升级python
cd ~/nvidia-driver-install
yum -y install python3
# 升级GDB
yum -y install ncurses-devel texinfo
wget http://ftp.nluug.nl/gnu/gdb/gdb-7.8.2.tar.gz
tar -zxvf gdb-7.8.2.tar.gz
cd gdb-7.8.2/
mkdir -p build
cd build
../configure
make -j40 && make install
mv /usr/bin/gdb /usr/bin/gdb.7.6.1
ln -s /usr/local/bin/gdb /usr/bin/gdb
gdb --version
# 升级Make
cd ~/nvidia-driver-install
wget https://mirrors.aliyun.com/gnu/make/make-4.3.tar.gz
tar -zxf make-4.3.tar.gz
cd make-4.3/
mkdir -p build && cd build
../configure --prefix=/usr
make -j16 && make install
make -v
# 安装GLIBC NVIDIA Container toolkit依赖GLIBC2.27 (GLIBC高版本包含低版本所以安装2.31 注意过程不要出错 出错可能导致系统无法启动 任何命令都无法执行,如果出错 不要断开ssh 不要重启机器,上网找解决办法)
cd ~/nvidia-driver-install
wget https://mirrors.aliyun.com/gnu/glibc/glibc-2.31.tar.gz
tar -zxvf glibc-2.31.tar.gz
cd glibc-2.31
# 确认依赖的软件(正常情况下 前面已经装过了)
cat INSTALL | grep -E "newer|later"
python3 --version
gdb --version | grep "GNU gdb"
make --version | grep "GNU Make"
gcc --version | grep GCC
yum install -y binutils texinfo bison sed gettext gettext-devel autoconf libtool
# 修改代码 忽略nss_test2检查
sed -i.bak 's|nss_test1|nss_test2" \&\& $name ne "nss_test1|g' scripts/test-installation.pl
mkdir -p build && cd build
../configure --prefix=/usr --disable-profile --enable-add-ons --with-headers=/usr/include --with-binutils=/usr/bin --disable-sanity-checks --disable-werror
make -j42 && make install
# install时有这样一个错误 似乎没有影响
make[1]: *** [Makefile:120: install] Error 1
make[1]: Leaving directory '/root/nvidia-driver-install/glibc-2.31'
make: *** [Makefile:12: install] Error 2
# 安装语言环境数据(不能省)
make -j32 localedata/install-locales
# 最后验证 查看glibc支持的版本
ldd --version
strings /lib64/libc.so.6 | grep GLIBC
# 重启生效
reboot
- 安装驱动
wget https://cn.download.nvidia.com/tesla/550.90.12/NVIDIA-Linux-x86_64-550.90.12.run
chmod +x NVIDIA-Linux-x86_64-550.90.12.run
./NVIDIA-Linux-x86_64-550.90.12.run
# 验证
nvidia-smi
- 安装 NVIDIA 容器工具包
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
tee /etc/yum.repos.d/nvidia-container-toolkit.repo
yum install -y nvidia-container-toolkit
# 配置docker(可选)
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
# 配置containerd
nvidia-ctk runtime configure --runtime=containerd
systemctl restart containerd
# master节点执行
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia \
&& helm repo update
# NVIDIA 官方没有提供CentOS7-kernel-5.4的镜像(本来想自己编译,但是尝试失败,所有采用手动安装驱动的方式)
helm install --wait --generate-name \
-n gpu-operator --create-namespace \
nvidia/gpu-operator \
--set driver.enabled=false \
--set toolkit.enabled=false
然后 等就行了!!! fuck k8s官方提示使用k8s-device-plugin 那个太复杂了
- GPU资源共享
经过以上步骤,项目中即可通过在负载中添加以下配置以使用GPU资源。
resources:
limits:
nvidia.com/gpu: '1'
但是GPU资源与CPU、内存不同,仅支持整数配置。因此一个Pod至少会独占一张显卡,实际在推理使用中会造成资源浪费。所以有必要配置GPU资源共享。 NVIDIA官方支持多实例方案(MIG)和时间分片(TimeSlicing)两种方式进行GPU资源共享。 MIG仅特定显卡型号支持,而笔者使用的显卡又是捡来的垃圾,不支持MIG,故本文仅对TimeSlicing方式进行说明。
参考: 官方文档 一文搞懂 GPU 共享方案: NVIDIA Time Slicing
- 创建ConfigMap用于配置TimeSlicing
cat << EOF | kubectl apply -f -
apiVersion: v1
kind: ConfigMap
metadata:
name: time-slicing-config-fine
namespace: gpu-operator
data:
tesla-p4: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 8
EOF
- 使用配置图配置设备插件并设置默认时间分片配置
kubectl patch clusterpolicies.nvidia.com/cluster-policy \
-n gpu-operator --type merge \
-p '{"spec": {"devicePlugin": {"config": {"name": "time-slicing-config-fine"}}}}'
- 确认
gpu-feature-discovery
和nvidia-device-plugin-daemonset
pod重新启动
kubectl get events -n gpu-operator --sort-by='.lastTimestamp'
也可通过Kubesphere页面查看 gpu-feature-discovery nvidia-device-plugin-daemonset
- 给GPU节点打上标签
kubectl label node \
--selector=nvidia.com/gpu.product=Tesla-P4 \
nvidia.com/device-plugin.config=tesla-p4
- 验证GPU时间分片配置
kubectl describe node k8s-worker1
kubectl describe node k8s-worker1 | grep Capacity -A 7
显示
Capacity:
cpu: 48
ephemeral-storage: 467039428Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 396202544Ki
nvidia.com/gpu: 8
pods: 1100
查看Labels、Capacity中gpu相关参数, 最终 GPU可用数 = 物理GPU * 分片replicas
3.7 安装nerdctl命令行
nerdctl is a Docker-compatible CLI for containerd.
wget https://github.com/containerd/nerdctl/releases/download/v1.7.7/nerdctl-1.7.7-linux-amd64.tar.gz
tar zxvf nerdctl-1.7.7-linux-amd64.tar.gz -C nerdctl
mv nerdctl/nerdctl /usr/bin/
# 测试查看镜像 注意nerdctl有namespace的概念 这点与docker不同
nerdctl images --namespace k8s.io
# 配置别名 方便日常使用
echo "alias k8sctl='nerdctl --namespace k8s.io'" >> ~/.bashrc
source ~/.bashrc
3.8 Kubesphere 插件
建议启用
- 日志系统
logging:
enabled: true # 将“false”更改为“true”。
containerruntime: containerd
es: # Storage backend for logging, tracing, events and auditing.
elasticsearchMasterReplicas: 1 # The total number of master nodes. Even numbers are not allowed.
elasticsearchDataReplicas: 1 # The total number of data nodes.
elasticsearchMasterVolumeSize: 4Gi # The volume size of Elasticsearch master nodes.
elasticsearchDataVolumeSize: 20Gi # The volume size of Elasticsearch data nodes.
logMaxAge: 7 # Log retention day in built-in Elasticsearch. It is 7 days by default.
elkPrefix: logstash # The string making up index names. The index name will be formatted as ks-<elk_prefix>-log.
externalElasticsearchHost: # The Host of external Elasticsearch.
externalElasticsearchPort: # The port of external Elasticsearch.
- 事件系统
events:
enabled: true # 将“false”更改为“true”。
- 告警系统
alerting:
enabled: true # 将“false”更改为“true”。
- 审计日志
auditing:
enabled: true # 将“false”更改为“true”。
查看安装过程
kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f