Rocky Linux9.3 部署安装kubernetes 1.28.5+docker-ce+calico

一、成员角色


主机

配置

IP

k8s-master

6C4M

172.16.1.240

k8s-node01

6C4M

172.16.1.239

k8s-node02

6C4M

172.16.1.238


二、OS系统初始化

1. (所有主机操作) 关闭或者放行指定端口防火墙(这里采用关闭方式)

systemctl disable --now firewalld.service  #关闭并禁止开机自启

	Removed "/etc/systemd/system/multi-user.target.wants/firewalld.service".
	Removed "/etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service".

systemctl status firewalld.service   #查看状态

    firewalld.service - firewalld - dynamic firewall daemon
      Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; preset: enabled)
      Active: inactive (dead)
        Docs: man:firewalld(1)

2. (所有主机操作)关闭SELinux

sed -ri 's#(SELINUX=)enforcing#\1disabled#g' /etc/selinux/config #永久关闭

setenforce 0  #临时关闭selinux
getenforce    #查看状态
Permissive

3. (所有主机操作) 配置hosts解析

cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
172.16.1.240 k8s-master
172.16.1.239 k8s-node01
172.16.1.238 k8s-node02

4.(任意单节点操作)此处在master节点进行配置ssh免密(可选)

dnf install sshpass -y #安装sshpass

ssh-keygen -P '' -q  -t rsa -f .ssh/id_rsa  #静默生成秘钥 
Password=YOURPASSWORD
for i in `awk -F"[ ]+" '/k8s/{print $0}' /etc/hosts`; do sshpass -p $Password ssh-copy-id -o StrictHostKeyChecking=no root@$i ;done #复制公钥到所有节点

for (( i=238;i<=240;i++ )); do scp  -r $HOME/.ssh/* root@172.16.1.$i:$HOME/.ssh/;done  #复制整个.ssh下文件到所有节点

5.(所有主机操作)配置时间同步

dnf install chrony -y #安装时间同步(rocky 自带)

vim /etc/chrony.conf #编辑配置文件加入一下内容

	pool ntp1.aliyun.com iburst
	pool ntp2.aliyun.com iburst
	pool cn.pool.ntp.org iburst

systemctl enable --now chronyd #配置开机自启

chronyc sources   #测试

	MS Name/IP address         Stratum Poll Reach LastRx Last sample               
	===============================================================================
	^* ntp1.flashdance.cx            2  10   377     2   -184us[ +692us] +/-   81ms
	^+ ntp6.flashdance.cx            2  10   377   786   -649us[ +235us] +/-   84ms
	^+ 2001:67c:21bc:1e::123         2  10   377   130    -23ms[  -22ms] +/-  180ms
	^+ tick.ntp.infomaniak.ch        1  10   377   899    +16ms[  +17ms] +/-  103ms

6. (所有主机操作)禁用swap分区

swapoff -a  #临时禁用

sed -i 's/.*swap.*/#&/' /etc/fstab #永久禁用

free -m   #检测

            	   total        used        free      shared  buff/cache   available
	Mem:            3654         465        1936          22        1517        3188
	Swap:              0           0           0

7. (所有主机操作)修改内核参数

cat >> /etc/sysctl.d/k8s.conf << EOF

#内核参数调整
vm.swappiness=0 
#配置iptables参数,使得流经网桥的流量也经过iptables/netfilter防火墙
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF


#加载网桥过滤模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

modprobe overlay
modprobe br_netfilter 
 
#重新加载
sysctl --system 或 sysctl -p /etc/sysctl.d/k8s.conf

#检测
sysctl -a 
	vm.swappiness = 0
	net.bridge.bridge-nf-call-ip6tables = 1
	net.bridge.bridge-nf-call-iptables = 1
	net.ipv4.ip_forward = 1

#返回如下表示加载成功
lsmod | grep br_netfilter 
br_netfilter           32768  0
bridge                303104  1 br_netfilter

8.(所有节点操作)配置ipvs功能


dnf install ipset ipvsadm -y  #安装ipset和ipvsadm
 
# 添加需要加载的模块写入脚本文件
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
overlay
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF

modprobe overlay
modprobe ip_vs && modprobe ip_vs_rr && modprobe ip_vs_wrr && modprobe ip_vs_sh && modprobe nf_conntrack
 
#查看模块是否加载成功
 lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh               16384  0
ip_vs_wrr              16384  0
ip_vs_rr               16384  0
ip_vs                 188416  6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack          176128  3 nf_nat,nft_ct,ip_vs
nf_defrag_ipv6         24576  2 nf_conntrack,ip_vs
libcrc32c              16384  5 nf_conntrack,nf_nat,nf_tables,xfs,ip_vs

三、配置安装docker-ce(所有节点操作)

1. 配置yum仓库

传送门

阿里云镜像站地址: docker-ce镜像_docker-ce下载地址_docker-ce安装教程-阿里巴巴开源镜像站 (aliyun.com)

dnf config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo  #添加阿里云docker仓库

sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo

dnf makecache #更新源数据

2. 安装docker-ce

dnf list docker-ce.x86_64 --showduplicates | sort -r #列出docker版本

dnf install docker-ce -y #此处安装最新版本

3. 修改配置文件,并启动docker

#修改配置文件
cat >> /etc/docker/daemon.json <<-EOF
{
"registry-mirrors": [
    "https://registry.docker-cn.com" 
 ],
 "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF

systemctl daemon-reload  #重载systemctl

systemctl enable --now docker   #启动docker并设置开机自启

四、cri-dockerd环境操作(所有节点操作)

传送门

地址:Releases · Mirantis/cri-dockerd (github.com)

1. 下载cri-dockerd

wget -c https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.9/cri-dockerd-0.3.9.amd64.tgz

2. 解压cri-dockerd到指定位置

tar -xvf cri-dockerd-0.3.9.amd64.tgz  --strip-components=1 -C /usr/local/bin/ #解压缩到指定目录

3. 定制配置文件和socket文件

获取Service文件:

wget -O /etc/systemd/system/cri-docker.service https://raw.githubusercontent.com/Mirantis/cri-dockerd/master/packaging/systemd/cri-docker.service
wget -O /etc/systemd/system/cri-docker.socket https://raw.githubusercontent.com/Mirantis/cri-dockerd/master/packaging/systemd/cri-docker.socket

编辑 /etc/systemd/system/cri-docker.service,修改ExecStart=/usr/bin/cri-dockerd 行内容为

ExecStart=/usr/local/bin/cri-dockerd --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.9 --container-runtime-endpoint=unix:///var/run/cri-dockerd.sock --cri-dockerd-root-directory=/var/lib/dockershim --cri-dockerd-root-directory=/var/lib/docker

cat > /etc/systemd/system/cri-dockerd.service<<-EOF
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
 
[Service]
Type=notify
ExecStart=/usr/local/bin/cri-dockerd --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.9 --network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin --container-runtime-endpoint=unix:///var/run/cri-dockerd.sock --cri-dockerd-root-directory=/var/lib/dockershim --docker-endpoint=unix:///var/run/docker.sock --cri-dockerd-root-directory=/var/lib/docker
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF
 
cat > /etc/systemd/system/cri-dockerd.socket <<-EOF
[Unit]
Description=CRI Docker Socket for the API
PartOf=cri-docker.service
[Socket]
ListenStream=/var/run/cri-dockerd.sock
SocketMode=0660
SocketUser=root
SocketGroup=docker
[Install]
WantedBy=sockets.target
EOF

4. 启动cri-dockerd并设置为开机自启

systemctl daemon-reload

systemctl enable --now cri-docker.service  #启动并设置开机自启

systemctl status  cri-docker.service  #检查状态显示如下

● cri-docker.service - CRI Interface for Docker Application Container Engine
     Loaded: loaded (/etc/systemd/system/cri-dockerd.service; enabled; preset: disabled)
     Active: active (running) since Tue 2024-01-09 15:51:04 CST; 10s ago
	 ...

五、k8s集群安装初始化

1. 配置源(所有节点操作)

传送门

kubernetes镜像_kubernetes下载地址_kubernetes安装教程-阿里巴巴开源镜像站 (aliyun.com)

cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/repodata/repomd.xml.key
EOF

2. 安装kubeadm、kubectl、kubelet(所有节点操作)

dnf install -y kubelet kubeadm kubectl #安装

systemctl enable --now  kubelet #启动kubelet 并设置开机自启

kubeadm config images list  --image-repository=registry.aliyuncs.com/google_containers  #查看拉取的镜像 

kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers  --cri-socket unix:///var/run/cri-dockerd.sock  #可使用此命令加上仓库地址拉取镜像 --cri-socket 指定拉取时使用的容器运行时

I0110 12:23:54.681105   85519 version.go:256] remote version is much newer: v1.29.0; falling back to: stable-1.28
registry.k8s.io/kube-apiserver:v1.28.5
registry.k8s.io/kube-controller-manager:v1.28.5
registry.k8s.io/kube-scheduler:v1.28.5
registry.k8s.io/kube-proxy:v1.28.5
registry.k8s.io/pause:3.9
registry.k8s.io/etcd:3.5.9-0
registry.k8s.io/coredns/coredns:v1.10.1

3. 初始化kubernetes(master节点操作)

# 初始化集群
kubeadm init --kubernetes-version=1.28.5 \
--apiserver-advertise-address=172.16.1.240 \
--image-repository  registry.aliyuncs.com/google_containers \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=Swap \
--cri-socket=unix:///var/run/cri-dockerd.sock


# 当显示一下说明初始化完成( kubadm join ..... 这里需要记录,后续用作与从节点加入)
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 172.16.1.240:6443 --token xxxxxxxxxxxxxxxxxxxxx \
        --discovery-token-ca-cert-hash sha256:xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

可选参数

说明

--apiserver-advertise-address

指定API Server地址

--apiserver-bind-port

指定绑定的API Server端口,默认值为6443

--apiserver-cert-extra-sans

指定API Server的服务器证书

--cert-dir

指定证书的路径

--dry-run

输出将要执行的操作,不做任何改变

--feature-gates

指定功能配置键值对,可控制是否启用各种功能

-h, --help

输出init命令的帮助信息

--ignore-preflight-errors

忽视检查项错误列表,例如“IsPrivilegedUser,Swap”,如填写为 'all' 则将忽视所有的检查项错误

--kubernetes-version

指定Kubernetes版本

--node-name

指定节点名称

--pod-network-cidr

指定pod网络IP地址段

--service-cidr

指定service的IP地址段

--service-dns-domain

指定Service的域名,默认为“cluster.local”

--skip-token-print

不打印Token

--token

指定token

--token-ttl

指定token有效时间,如果设置为“0”,则永不过期

--image-repository

指定镜像仓库地址,默认为"k8s.gcr.io"

4.定制k8s的配置文件(master节点操作)

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config


# 查看主节点状态
kubectl get componentstatuses 

NAME                 STATUS    MESSAGE   ERROR
scheduler            Healthy   ok        
controller-manager   Healthy   ok        
etcd-0               Healthy   ok   
扩展: kubectl在node节点上运行
scp  -r  $HOME/.kube   k8s-node01:$HOME/
​scp  -r  $HOME/.kube   k8s-node02:$HOME/

5. 配置命令补全(master节点操作)

# 设置kubectl命令补齐功能
echo "source <(kubectl completion bash)" >> ~/.bashrc
echo "source <(kubeadm completion bash)" >> ~/.bashrc
source ~/.bashrc

6. 从节点加入集群(所有从节点操作)

默认的 join token 有效期限为24小时,超过时间后需要重新创建token

使用cri-dockerd 需要再kubeadm join 后加入 --cri-socket=unix:///var/run/cri-dockerd.sock

# 从节点加入
kubeadm join 172.16.1.240:6443 --token w8qx09.tvql7awcclxgs80z --discovery-token-ca-cert-hash sha256:cb3f09591298584b8c3ca9134995eca3effb73942c57aeb20db133ea4dbe9eab --cri-socket=unix:///var/run/cri-dockerd.sock 

kubectl get nodes  # 主节点使用命令查看是否加入
NAME         STATUS     ROLES           AGE   VERSION
k8s-master   NotReady   control-plane   5m   v1.28.5
k8s-node01   NotReady   <none>          2s   v1.28.5
k8s-node02   NotReady   <none>          2s    v1.28.5

7. 设定kubeletl网络(flannel和) 这里使用calico(master节点操作)

flannel 传送门: GitHub - flannel-io/flannel: flannel is a network fabric for containers, designed for Kubernetes

calico 传送门: Quickstart for Calico on Kubernetes | Calico Documentation (tigera.io)

#安装 Tigera Calico 运算符和自定义资源定义 (此命令会创建一个tigera-operator的命名空间,可以使用kubectl get namespaces 查看 )
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/tigera-operator.yaml

#查看tigera-operator内pod有没有创建成功
kubectl get pods -n tigera-operator

NAME                               READY   STATUS    RESTARTS   AGE
tigera-operator-7f8cd97876-24tkd   1/1     Running   0          119s



#通过创建必要的自定义资源来安装 Calico (注意修改默认 IP 池 CIDR 以匹配您的容器网络 CIDR)

wget -c https://raw.githubusercontent.com/projectcalico/calico/v3.26.4/manifests/custom-resources.yaml

#编辑文件修改CIDR网段与kubeadm初始化时的ip网段一致 "--pod-network-cidr=10.244.0.0/16 使用10.244.0.0/16"

vim custom-resources.yaml 
...
cidr: 192.168.0.0/16 
修改为
cidr: 10.244.0.0/16
...

#创建
kubectl apply -f custom-resources.yaml 

#验证是否创建了calico-system命名空间(如果没有创建,可能是网段修改不正确)
kubectl get ns 
NAME              STATUS        AGE
calico-system     Terminating   46s
default           Active        15m
kube-node-lease   Active        15m
kube-public       Active        15m
kube-system       Active        15m
tigera-operator   Active        15m

#稍等片刻后查看pod是否创建完成,时间取决于自身网络环境
kubectl get pod -n calico-system  

NAME                                       READY   STATUS    RESTARTS   AGE
calico-kube-controllers-74dc869fd7-xz7gf   1/1     Running   0          5m50s
calico-node-c62dp                          1/1     Running   0          5m50s  
calico-node-glqbn                          1/1     Running   0          5m50s
calico-node-nh9xc                          1/1     Running   0          5m50s
calico-typha-6496d858d6-4bd9v              1/1     Running   0          5m42s
calico-typha-6496d858d6-vrwwm              1/1     Running   0          5m51s
csi-node-driver-2hn9x                      2/2     Running   0          5m50s
csi-node-driver-cp985                      2/2     Running   0          5m50s
csi-node-driver-wvwrq                      2/2     Running   0          5m50s

#注意: 若是calico-node READY 列是0/1 可以使用 kubectl describe pod  pod名称 -n calico-system 查看具体原因



#验证集群节点是否ready
kubectl get nodes

NAME         STATUS   ROLES           AGE    VERSION
k8s-master   Ready    control-plane   16m   v1.28.5
k8s-node01   Ready    <none>          13m   v1.28.5
k8s-node02   Ready    <none>          13m   v1.28.5

8. k8s使用ipvs(master节点操作)

#编辑k8s-proxy的configmap文件,在54行左右修改mode值为ipvs

kubectl edit configmaps kube-proxy  -n kube-system
...
37     ipvs:
 38       excludeCIDRs: null
 39       minSyncPeriod: 0s
 40       scheduler: ""
 41       strictARP: false
 42       syncPeriod: 0s
 43       tcpFinTimeout: 0s
 44       tcpTimeout: 0s
 45       udpTimeout: 0s
 46     kind: KubeProxyConfiguration
 47     logging:
 48       flushFrequency: 0
 49       options:
 50         json:
 51           infoBufferSize: "0"
 52       verbosity: 0
 53     metricsBindAddress: ""
 54     mode: "ipvs"          #将mode: ""修改为mode: "ipvs"
 55     nodePortAddresses: null
 56     oomScoreAdj: null
 57     portRange: ""
 58     showHiddenMetricsForVersion: ""
 59     winkernel:
 60       enableDSR: false
 61       forwardHealthCheckVip: false
 62       networkName: ""
 63       rootHnsEndpointName: ""
 64       sourceVip: ""
...

#删除所有kube-proxy让k8s进行自愈重建
kubectl delete pod -l k8s-app=kube-proxy  -n kube-system 

#验证ipvs是否可用
ipvsadm -ln

IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.96.0.1:443 rr
  -> 172.16.1.240:6443            Masq    1      0          0         
TCP  10.96.0.10:53 rr
  -> 10.244.235.193:53            Masq    1      0          0         
  -> 10.244.235.194:53            Masq    1      0          0         
TCP  10.96.0.10:9153 rr
  -> 10.244.235.193:9153          Masq    1      0          0         
  -> 10.244.235.194:9153          Masq    1      0          0         
TCP  10.97.29.173:443 rr
  -> 10.244.58.195:5443           Masq    1      0          0         
  -> 10.244.85.194:5443           Masq    1      0          0         
TCP  10.102.206.172:80 rr
  -> 10.244.58.199:80             Masq    1      0          0         
  -> 10.244.85.218:80             Masq    1      0          0         
  -> 10.244.85.219:80             Masq    1      0          0         
TCP  10.106.156.57:5473 rr
  -> 172.16.1.238:5473            Masq    1      0          0         
  -> 172.16.1.239:5473            Masq    1      0          0         
UDP  10.96.0.10:53 rr
  -> 10.244.235.193:53            Masq    1      0          0         
  -> 10.244.235.194:53            Masq    1      0          0   

六. 常用命令

功能说

命令

node

查看服务器节点

kubectl get nodes

查看服务器节点详情

kubectl get nodes -o wide

节点打标签

kubectl label nodes <节点名称> labelName=<标签名称>

查看节点标签

kubectl get node --show-labels

删除节点标签

kubectl label  node <节点名称>  labelName-

pod

查看pod节点

kubectl get pod

查看所有pod节点

kubectl get pods -A

查看pod节点详情

kubectl get pod -o wide

查看所有名称空间下的pod

kubectl get pod --all-namespaces

根据yaml文件创建pod

kubectl apply -f <文件名称>

根据yaml文件删除pod

kubectl delete -f <文件名称>

删除pod节点

kubectl delete pod <pod名称> -n <名称空间>

查看异常的pod节点

kubectl get pods -n <名称空间> | grep -v Running

查看异常pod节点的日志

kubectl describe pod <pod名称>  -n <名称空间>

进入默认命名空间的pod节点

kubectl exec -it <pod名称> -- /bin/bash

进入某个特定命名空间下的pod节点

kubectl exec -it <pod名称>  -n <命名空间> -- /bin/bash

普通方式创建pod

kubectl run <pod名称> --image=<镜像名称>

监控pod(一秒钟更新一次命令)

watch -n 1 kubectl get pod

deployment

deployment部署pod(具有自愈能力,宕机自动拉起)

kubectl create deployment <pod名称> --image=<镜像名称>

deployment部署pod(多副本)

kubectl create deployment <pod名称> --image=<镜像名称> --replicas=3

查看deployment部署

kubectl get deploy

删除deployment部署

kubectl delete deploy <pod名称>

deployment扩容\缩容pod

kubectl scale deploy/<pod名称> --replicas=<5>

deployment扩容\缩容pod

kubectl edit deploy <pod名称>

deployment滚动更新pod

kubectl set image deploy/<pod名称> <容器名称>=<镜像名称:版本号> --record

deployment查看pod回退版本

kubectl rollout history deploy/<pod名称>

deployment查看pod回退版本详情

kubectl rollout history deploy/<pod名称> --revision=1

deployment回退pod到上一个版本

kubectl rollout undo deploy/<pod名称>

deployment回退pod到指定版本

kubectl rollout undo deploy/<pod名称> --to-revision=1

deployment暴露pod集群内部访问(ClusterIP)

kubectl expose deploy <pod名称> --port=8080 --target-port=80 --type=ClusterIP

deployment暴露pod外网访问(NodePort)

kubectl expose deploy <pod名称> --port=8080 --target-port=80 --type=NodePort

service

查看服务

kubectl get svc

查看服务详情

kubectl get svc -o wide

查看所有名称空间下的服务

kubectl get svc --all-namespaces

namespace

查看名称空间

kubectl get namespace

查看名称空间

kubectl get ns

创建名称空间

kubectl create ns <名称>

删除名称空间

kubectl delete ns <名称>

StatefulSet

列出命名空间中的所有 StatefulSet

kubectl get statefulsets -n <namespace>

查看一个 StatefulSet详情

kubectl describe statefulset <statefulset-name> -n <namespace>