k8s-1.29.0部署

基本环境准备

系统环境信息配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# 1. 配置hosts
hostnamectl set-hostname control-plane
exec bash

# 2. 配置hosts
cat <<'EOF' | tee -a /etc/hosts
192.168.1.237 control-plane
EOF

# 3. 转发 IPv4 并让 iptables 看到桥接流量
cat <<'EOF' | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

# 4. 确认 br_netfilter 和 overlay 模块被加载:
lsmod | grep br_netfilter
lsmod | grep overlay


# 5. 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<'EOF' | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF

# 6. 应用 sysctl 参数而不重新启动
sudo sysctl --system

# 验证net.bridge.bridge-nf-call-iptables, net.bridge.bridge-nf-call-ip6tables , net.ipv4.ip_forward 系统变量设置为1
sysctl net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward

# 7. 关闭swap
swapoff -a
sed -i '/swap/s/.*/#&/g' /etc/fstab


# 8. 关闭selinux
sudo setenforce 0
sudo sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config

# 9. 安装chrony同步时间
yum install chrony -y
systemctl enable --now chronyd
chronyc sources

安装ipvs用于做代理

https://github.com/kubernetes/kubernetes/blob/master/pkg/proxy/ipvs/README.md#run-kube-proxy-in-ipvs-mode

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
cat <<'EOF' | tee /etc/sysconfig/modules/ipvs.modules
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4

yum -y install ipset ipvsadm

# 查看安装的版本
[root@control-plane calico]# yum -y install ipset ipvsadm
Package ipset-7.1-1.el7.x86_64 already installed and latest version
Package ipvsadm-1.27-8.el7.x86_64 already installed and latest version

安装kubeadm

官方(推荐)

官方才是最新的
https://kubernetes.io/zh-cn/docs/setup/production-environment/tools/kubeadm/install-kubeadm/

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

cat <<'EOF' | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/
enabled=1
gpgcheck=1
gpgkey=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key
exclude=kubelet kubeadm kubectl cri-tools kubernetes-cni
EOF

sudo yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
sudo systemctl enable --now kubelet

# kubelet 现在每隔几秒就会重启,因为它陷入了一个等待 kubeadm 指令的死循环。

# 查看安装版本
[root@control-plane ~]# rpm -qa | grep -E "kube"
kubernetes-cni-1.3.0-150500.1.1.x86_64
kubelet-1.29.0-150500.1.1.x86_64
kubectl-1.29.0-150500.1.1.x86_64
kubeadm-1.29.0-150500.1.1.x86_64

阿里云

https://developer.aliyun.com/mirror/kubernetes?spm=a2c6h.13651102.0.0.560a1b11gYpsH1
2024-01-02, 官方最新1.29.0, 阿里云只有1.28.2

1
2
3
4
5
6
7
8
9
10
11
12
13

cat <<'EOF' > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

yum install -y kubelet kubeadm kubectl
systemctl enable kubelet && systemctl start kubelet

清华

https://mirrors.tuna.tsinghua.edu.cn/help/kubernetes/
2024-01-02, 官方最新1.29.0, 清华只有1.28.2

1
2
3
4
5
6
7
8
9
10
cat <<'EOF' | tee /etc/yum.repos.d/kubernetes.repo

[kubernetes]
name=kubernetes
baseurl=https://mirrors.tuna.tsinghua.edu.cn/kubernetes/yum/repos/kubernetes-el7-$basearch
enabled=1
gpgcheck=0
EOF

yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes

命令补全

1
2
3
4
5
6
yum -y install bash-completion
# type _init_completion测试成功则说明bash-completion已经安装且正常生效了
kubectl completion bash > /etc/bash_completion.d/kubectl
kubeadm completion bash > /etc/bash_completion.d/kubeadm
# 使其在当前bash生效
exec bash

配置crictl

CRI为docker

1
2
3
4
5
6
7
8
# 参考: https://github.com/kubernetes-sigs/cri-tools/blob/master/docs/crictl.md
cat <<'EOF' | tee /etc/crictl.yaml
runtime-endpoint: unix:///var/run/cri-dockerd.sock
image-endpoint: unix:///var/run/cri-dockerd.sock
timeout: 2
debug: false
pull-image-on-create: false
EOF

CRI为containerd

1
2
3
4
5
6
7
8
9
# 参考: https://github.com/kubernetes-sigs/cri-tools/blob/master/docs/crictl.md
cat <<'EOF' | tee /etc/crictl.yaml
runtime-endpoint: unix:///var/run/containerd/containerd.sock
image-endpoint: unix:///var/run/containerd/containerd.sock
timeout: 2
debug: false
pull-image-on-create: false
EOF

基于docker

安装docker

官方

https://docs.docker.com/engine/install/centos/

1
2
3
4
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
# sudo yum install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
sudo yum -y install docker-ce docker-ce-cli docker-buildx-plugin docker-compose-plugin

阿里云源

https://developer.aliyun.com/mirror/docker-ce?spm=a2c6h.13651102.0.0.57e31b11KIc1Rx

1
2
3
4
5
6
7
8
9
10
11
# step 1: 安装必要的一些系统工具
sudo yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
sudo yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3
sudo sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# Step 4: 更新并安装Docker-CE
sudo yum makecache fast
sudo yum -y install docker-ce
# Step 4: 开启Docker服务
sudo service docker start

清华源

https://mirrors.tuna.tsinghua.edu.cn/help/docker-ce/

1
2
3
4
5
yum install -y yum-utils
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
sed -i 's+https://download.docker.com+https://mirrors.tuna.tsinghua.edu.cn/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# yum install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
sudo yum -y install docker-ce docker-ce-cli docker-buildx-plugin docker-compose-plugin

配置docker

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
sudo mkdir /etc/docker
cat <<'EOF' | sudo tee /etc/docker/daemon.json
{
"registry-mirrors": [
"https://ywuql7hg.mirror.aliyuncs.com",
"https://docker.mirrors.ustc.edu.cn",
"https://reg-mirror.qiniu.com",
"http://hub-mirror.c.163.com"
],

"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file":"3"
}
}
EOF

启动docker

1
2
3
4
5
6
7
8
9
10
11
12
# 查看版本
[root@centos7 init-install]# rpm -qa | grep docker
docker-ce-rootless-extras-24.0.7-1.el7.x86_64
docker-ce-cli-24.0.7-1.el7.x86_64
docker-ce-24.0.7-1.el7.x86_64
cri-dockerd-0.3.8-3.el7.x86_64
docker-buildx-plugin-0.11.2-1.el7.x86_64
docker-compose-plugin-2.21.0-1.el7.x86_64

# 启动
systemctl enable --now docker

安装cri-docker

自 1.24 版起,Dockershim 已从 Kubernetes 项目中移除。阅读 Dockershim 移除的常见问题了解更多详情。
高版本需要安装cri-dockerd,

github地址: https://github.com/Mirantis/cri-dockerd

1
2
3
4
# 安装
rpm -ivh https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.9/cri-dockerd-0.3.9-3.el7.x86_64.rpm
# 修改配置启动脚本, 不然默认拉取的镜像是3.6的
sed -i 's#cri-dockerd --container-runtime-endpoint#cri-dockerd --pod-infra-container-image registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9 --container-runtime-endpoint#g' /usr/lib/systemd/system/cri-docker.service

启动cri-docker

1
systemctl enable --now cri-docker

部署k8s

查看需要下载的镜像

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
[root@centos7 ~]# kubeadm config images list
registry.k8s.io/kube-apiserver:v1.29.0
registry.k8s.io/kube-controller-manager:v1.29.0
registry.k8s.io/kube-scheduler:v1.29.0
registry.k8s.io/kube-proxy:v1.29.0
registry.k8s.io/coredns/coredns:v1.11.1
registry.k8s.io/pause:3.9
registry.k8s.io/etcd:3.5.10-0

# 通过google云下载并推送到自己的阿里云仓库, !!!������������需要先登录阿里云的镜像仓库
while read line;do
img_url=$(echo $line | awk -F '[/:]' '{print $1}')
img_name=$(echo $line | awk -F '[/:]' '{print $2}')
img_ver=$(echo $line | awk -F '[/:]' '{print $3}')
src_img=${img_url}/${img_name}:${img_ver}
dst_img=registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/${img_name}:${img_ver}
docker pull $src_img
docker tag $src_img $dst_img
echo docker push $dst_img
docker push $dst_img
done < kubeadm config images list

kubeadm初始化配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# 打印默认的init信息并保存.
mkdir -p /data/k8s/init-install
cd /data/k8s/init-install
kubeadm config print init-defaults > kubeadm.yaml

# 修改信息 kubeadm.yaml如下:
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.1.237 # 修改为控制平面IP
bindPort: 6443
nodeRegistration:
# criSocket: unix:///var/run/containerd/containerd.sock
criSocket: unix:///var/run/cri-dockerd.sock # 修改为cri-dockerd的sock
imagePullPolicy: IfNotPresent
name: control-plane # node修改为control-plane
taints:

---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
#imageRepository: registry.k8s.io
imageRepository: registry.cn-shenzhen.aliyuncs.com/only_wild_weeds # 修改为自己的镜像源
kind: ClusterConfiguration
kubernetesVersion: 1.29.0 # kubeadm是1.29.0, 输出安装的版本就不用改了, 如果需要改,
# 参考
# - Kubernetes [版本与版本间的偏差策略](https://kubernetes.io/zh-cn/releases/version-skew-policy/)
# - kubeadm 特定的[版本偏差策略](https://kubernetes.io/zh-cn/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/#version-skew-policy)
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16 # 指定pod的IP地址.
scheduler: {}
#### 配置kube-proxy 模式,
# 参考: https://github.com/kubernetes/kubernetes/blob/master/pkg/proxy/ipvs/README.md#run-kube-proxy-in-ipvs-mode
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs

下载初始化需要的镜像

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

# 查看初始化需要下载的镜像.
[root@centos7 init-install]# kubeadm config images list --config=kubeadm.yaml
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-apiserver:v1.29.0
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-controller-manager:v1.29.0
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-scheduler:v1.29.0
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-proxy:v1.29.0
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/coredns:v1.11.1
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/etcd:3.5.10-0


# 手动下载初始化需要的镜像.
[root@centos7 init-install]# kubeadm config images pull --image-repository=registry.cn-shenzhen.aliyuncs.com/only_wild_weeds --cri-socket=unix:///var/run/cri-dockerd.sock --kubernetes-version=1.29.0
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-apiserver:v1.29.0
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-controller-manager:v1.29.0
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-scheduler:v1.29.0
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-proxy:v1.29.0
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/coredns:v1.11.1
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9
[config/images] Pulled registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/etcd:3.5.10-0


部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

# 使用生成的配置文件初始化(默认是找/var/lib/kubelet/config.yaml)
kubeadm init --config /data/k8s/init-install/kubeadm.yaml

# 报错:最后是因为没有配置cri-dockerd的--pod-infra-container-image 参数, 导致默认下载的是 registry.k8s.io/pause:3.6
kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.

Unfortunately, an error has occurred:
timed out waiting for the condition

This error is likely caused by:
- The kubelet is not running
- The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)

If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
- 'systemctl status kubelet'
- 'journalctl -xeu kubelet'

Additionally, a control plane component may have crashed or exited when started by the container runtime.
To troubleshoot, list all containers using your preferred container runtimes CLI.
Here is one example how you may list all running Kubernetes containers by using crictl:
- 'crictl --runtime-endpoint unix:///var/run/cri-dockerd.sock ps -a | grep kube | grep -v pause'
Once you have found the failing container, you can inspect its logs with:
- 'crictl --runtime-endpoint unix:///var/run/cri-dockerd.sock logs CONTAINERID'
error execution phase wait-control-plane: couldn't initialize a Kubernetes cluster
To see the stack trace of this error execute with --v=5 or higher


# kubelet的日志:
Jan 03 02:17:11 control-plane kubelet[2598]: E0103 02:17:11.964435 2598 remote_runtime.go:193] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed pulling image \"registry.k8s.io/pause:3.6\": Error response from daemon: Head \"https://asia-east1-docker.pkg.dev/v2/k8s-artifacts-prod/images/pause/manifests/3.6\": dial tcp 108.177.97.82:443: i/o timeout"

# 查看kubelet的配置, 明明指定的基础镜像是: registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9, 但它还是取下载3.6, 有毒
# cat /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
# cat /var/lib/kubelet/kubeadm-flags.env
KUBELET_KUBEADM_ARGS="--container-runtime-endpoint=unix:///var/run/cri-dockerd.sock --hostname-override=control-plane --pod-infra-container-image=registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9"

# 尝试手动下载这个镜像,
crictl pull registry.k8s.io/pause:3.6

# 报另一个错了, 这事因为没有部署cni导致的, 部署cni速度很慢.
Jan 03 02:30:45 control-plane kubelet[3726]: E0103 02:30:45.842118 3726 kubelet.go:2892] "Container runtime network not ready" networkReady="NetworkReady=false reason:NetworkPluginNotReady message:docker: network plugin is not ready: cni config uninitialized"


# 没有找到和k8s版本对应关系, 可能和这个有关系, 应该是配套cri没有升级上来导致的, 先这样吧, MD, 搞几个小时了.
https://github.com/Mirantis/cri-dockerd/blob/master/README.md#to-use-with-kubernetes

去掉control-plane的污点

1
2
3
4
# 默认情况下,出于安全原因,你的集群不会在控制平面节点上调度 Pod。 
# 如果你希望能够在单机 Kubernetes 集群等控制平面节点上调度 Pod,请运行:
kubectl taint nodes --all node-role.kubernetes.io/control-plane-

卸载reset

1
2
3
4
5
6
7
8
kubeadm reset -f --cri-socket=unix:///var/run/cri-dockerd.sock
kubeadm reset -f --cri-socket=unix:///var/run/containerd/containerd.sock

iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm -C
rm -rf /etc/cni/net.d $HOME/.kube/config

systemctl restart kubelet

CNI部署后检查信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
[root@control-plane calico]# kubectl get no -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
control-plane Ready control-plane 34m v1.29.0 192.168.1.237 <none> CentOS Linux 7 (Core) 3.10.0-1160.105.1.el7.x86_64 docker://24.0.7

[root@control-plane calico]# kubectl get po --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
calico-apiserver calico-apiserver-6f9c7fb8c9-qcmhl 1/1 Running 0 12m
calico-apiserver calico-apiserver-6f9c7fb8c9-x5w49 1/1 Running 0 12m
calico-system calico-kube-controllers-67d65977d7-x54xm 1/1 Running 0 24m
calico-system calico-node-4qmwf 1/1 Running 0 22m
calico-system calico-typha-bbc495d89-gnmcg 1/1 Running 0 24m
calico-system csi-node-driver-f8p9k 2/2 Running 0 24m
kube-system coredns-76cdc8f664-4kt62 1/1 Running 0 33m
kube-system coredns-76cdc8f664-dvbdr 1/1 Running 0 33m
kube-system etcd-control-plane 1/1 Running 0 33m
kube-system kube-apiserver-control-plane 1/1 Running 0 33m
kube-system kube-controller-manager-control-plane 1/1 Running 0 33m
kube-system kube-proxy-lg2kb 1/1 Running 0 33m
kube-system kube-scheduler-control-plane 1/1 Running 0 33m
tigera-operator tigera-operator-55585899bf-qfnfg 1/1 Running 0 31m
[root@control-plane calico]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
quay.io/tigera/operator v1.32.3 7bc79e0d3be4 2 weeks ago 69.6MB
calico/typha v3.27.0 b33768e0da1f 2 weeks ago 68.3MB
calico/kube-controllers v3.27.0 4e87edec0297 2 weeks ago 75.5MB
calico/apiserver v3.27.0 848c5b919e8d 2 weeks ago 93.9MB
calico/cni v3.27.0 8e8d96a874c0 2 weeks ago 211MB
calico/node-driver-registrar v3.27.0 d36ef67f7b24 2 weeks ago 22.5MB
calico/csi v3.27.0 91c1c91da760 2 weeks ago 17.4MB
calico/pod2daemon-flexvol v3.27.0 6506d2e0be2d 2 weeks ago 15.4MB
calico/node v3.27.0 1843802b91be 2 weeks ago 340MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-controller-manager v1.29.0 0824682bcdc8 3 weeks ago 122MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-apiserver v1.29.0 1443a367b16d 3 weeks ago 127MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-scheduler v1.29.0 7ace497ddb8e 3 weeks ago 59.5MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-proxy v1.29.0 98262743b26f 3 weeks ago 82.2MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/etcd 3.5.10-0 a0eed15eed44 2 months ago 148MB
registry.aliyuncs.com/google_containers/coredns v1.11.1 cbb01a7bd410 4 months ago 59.8MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/coredns v1.11.1 cbb01a7bd410 4 months ago 59.8MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause 3.9 e6f181688397 14 months ago 744kB


基于containerd

github安装文档: https://github.com/containerd/containerd/blob/main/docs/getting-started.md
版本兼容性:https://github.com/containerd/containerd/blob/main/RELEASES.md#kubernetes-support

安装containerd

1
2
3
4
5
6
7
8
9
10
11
12
# 参考: https://docs.docker.com/engine/install/centos/
# 安装containerd
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo

# 安装最新版本
sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin

# 查看 containerd版本
rpm -qa | grep containerd
containerd.io-1.6.26-3.1.el7.x86_64

配置config.toml

config.toml参数详解: https://github.com/containerd/containerd/blob/main/docs/man/containerd-config.toml.5.md

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# 备份原配置文件
cp /etc/containerd/config.toml{,_bak}
# 重新生成配置文件
containerd config default > /etc/containerd/config.toml

# 修改配置文件
cat /etc/containerd/config.toml
#
disabled_plugins = []
imports = []
oom_score = 0
plugin_dir = ""
required_plugins = []
root = "/var/lib/containerd"
state = "/run/containerd"
temp = ""
version = 2

[cgroup]
path = ""

[debug]
address = ""
format = ""
gid = 0
level = ""
uid = 0

[grpc]
address = "/run/containerd/containerd.sock"
gid = 0
max_recv_message_size = 16777216
max_send_message_size = 16777216
tcp_address = ""
tcp_tls_ca = ""
tcp_tls_cert = ""
tcp_tls_key = ""
uid = 0

[metrics]
address = ""
grpc_histogram = false

[plugins]

[plugins."io.containerd.gc.v1.scheduler"]
deletion_threshold = 0
mutation_threshold = 100
pause_threshold = 0.02
schedule_delay = "0s"
startup_delay = "100ms"

[plugins."io.containerd.grpc.v1.cri"]
device_ownership_from_security_context = false
disable_apparmor = false
disable_cgroup = false
disable_hugetlb_controller = true
disable_proc_mount = false
disable_tcp_service = true
enable_selinux = false
enable_tls_streaming = false
enable_unprivileged_icmp = false
enable_unprivileged_ports = false
ignore_image_defined_volumes = false
max_concurrent_downloads = 3
max_container_log_line_size = 16384
netns_mounts_under_state_dir = false
restrict_oom_score_adj = false
#sandbox_image = "registry.k8s.io/pause:3.6"
sandbox_image = "registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause:3.9" # 修改这
selinux_category_range = 1024
stats_collect_period = 10
stream_idle_timeout = "4h0m0s"
stream_server_address = "127.0.0.1"
stream_server_port = "0"
systemd_cgroup = false
tolerate_missing_hugetlb_controller = true
unset_seccomp_profile = ""

[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
conf_template = ""
ip_pref = ""
max_conf_num = 1

[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
disable_snapshot_annotations = true
discard_unpacked_layers = false
ignore_rdt_not_enabled_errors = false
no_pivot = false
snapshotter = "overlayfs"

[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""

[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = ""
CriuImagePath = ""
CriuPath = ""
CriuWorkPath = ""
IoGid = 0
IoUid = 0
NoNewKeyring = false
NoPivotRoot = false
Root = ""
ShimCgroup = ""
# SystemdCgroup = false
SystemdCgroup = true # 修改这

[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""

[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

[plugins."io.containerd.grpc.v1.cri".image_decryption]
key_model = "node"

[plugins."io.containerd.grpc.v1.cri".registry]
config_path = ""

[plugins."io.containerd.grpc.v1.cri".registry.auths]

[plugins."io.containerd.grpc.v1.cri".registry.configs]

[plugins."io.containerd.grpc.v1.cri".registry.headers]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors]

[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
tls_cert_file = ""
tls_key_file = ""

[plugins."io.containerd.internal.v1.opt"]
path = "/opt/containerd"

[plugins."io.containerd.internal.v1.restart"]
interval = "10s"

[plugins."io.containerd.internal.v1.tracing"]
sampling_ratio = 1.0
service_name = "containerd"

[plugins."io.containerd.metadata.v1.bolt"]
content_sharing_policy = "shared"

[plugins."io.containerd.monitor.v1.cgroups"]
no_prometheus = false

[plugins."io.containerd.runtime.v1.linux"]
no_shim = false
runtime = "runc"
runtime_root = ""
shim = "containerd-shim"
shim_debug = false

[plugins."io.containerd.runtime.v2.task"]
platforms = ["linux/amd64"]
sched_core = false

[plugins."io.containerd.service.v1.diff-service"]
default = ["walking"]

[plugins."io.containerd.service.v1.tasks-service"]
rdt_config_file = ""

[plugins."io.containerd.snapshotter.v1.aufs"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.btrfs"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.devmapper"]
async_remove = false
base_image_size = ""
discard_blocks = false
fs_options = ""
fs_type = ""
pool_name = ""
root_path = ""

[plugins."io.containerd.snapshotter.v1.native"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.overlayfs"]
mount_options = []
root_path = ""
sync_remove = false
upperdir_label = false

[plugins."io.containerd.snapshotter.v1.zfs"]
root_path = ""

[plugins."io.containerd.tracing.processor.v1.otlp"]
endpoint = ""
insecure = false
protocol = ""

[proxy_plugins]

[stream_processors]

[stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar"

[stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar+gzip"

[timeouts]
"io.containerd.timeout.bolt.open" = "0s"
"io.containerd.timeout.shim.cleanup" = "5s"
"io.containerd.timeout.shim.load" = "5s"
"io.containerd.timeout.shim.shutdown" = "3s"
"io.containerd.timeout.task.state" = "2s"

[ttrpc]
address = ""
gid = 0
uid = 0

启动containerd

1
systemctl enable --now containerd

安装nerdctl工具

ctr和crictl都只能用于debug, 不能打tag, nerdctl 可以,
详见: https://github.com/containerd/containerd/blob/main/docs/getting-started.md#interacting-with-containerd-via-cli
git hub地址: https://github.com/containerd/nerdctl

1
2
3
4
5
6
7
8
mkdir  -p /data/src
cd /data/src
wget https://github.com/containerd/nerdctl/releases/download/v1.7.2/nerdctl-1.7.2-linux-amd64.tar.gz
mkdir -p nerdctl-1.7.2
tar zxvf nerdctl-1.7.2-linux-amd64.tar.gz -C nerdctl-1.7.2

cp nerdctl-1.7.2/* /usr/sbin/

部署k8s

kubeadm初始化配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# 打印默认的init信息并保存.
mkdir -p /data/k8s/init-install
cd /data/k8s/init-install
kubeadm config print init-defaults > kubeadm.yaml

# 修改信息如下:
[root@control-plane init-install]# cat kubeadm.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.1.237 # 修改为控制平面IP
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/containerd/containerd.sock # containerd的sock
#criSocket: unix:///var/run/cri-dockerd.sock # 修改为cri-dockerd的sock
imagePullPolicy: IfNotPresent
name: control-plane # node修改为control-plane
taints:
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
#imageRepository: registry.k8s.io
imageRepository: registry.cn-shenzhen.aliyuncs.com/only_wild_weeds # 修改为自己的镜像源
kind: ClusterConfiguration
kubernetesVersion: 1.29.0 # kubeadm是1.29.0, 输出安装的版本就不用改了, 如果需要改,
# 参考
# - Kubernetes [版本与版本间的偏差策略](https://kubernetes.io/zh-cn/releases/version-skew-policy/)
# - kubeadm 特定的[版本偏差策略](https://kubernetes.io/zh-cn/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/#version-skew-policy)
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16 # 指定pod的IP地址.
scheduler: {}
#### 配置kube-proxy 模式,
# 参考: https://github.com/kubernetes/kubernetes/blob/master/pkg/proxy/ipvs/README.md#run-kube-proxy-in-ipvs-mode
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs

下载初始化需要的镜像

1
2
3

kubeadm config images pull --image-repository=registry.cn-shenzhen.aliyuncs.com/only_wild_weeds --kubernetes-version=1.29.0

部署

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

# 使用生成的配置文件初始化(默认是找/var/lib/kubelet/config.yaml)
kubeadm init --config /data/k8s/init-install/kubeadm.yaml

# 如下输出

[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.126.128:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:2228e98b56d218ead25d8d54173148c312dcd911f9be77eac9e768d095f8446e


# 按提示的操作输出
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 查看node, 可以看到CRI是 containerd://1.6.26
[root@control-plane init-install]# kubectl get no -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
control-plane NotReady control-plane 19s v1.29.0 192.168.126.128 <none> CentOS Linux 7 (Core) 3.10.0-1160.105.1.el7.x86_64 containerd://1.6.26


# 查看pod, coredns在Pending, 因为等待 ready
[root@control-plane flannel]# kubectl get po --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-76cdc8f664-gwh49 0/1 Pending 0 58s
kube-system coredns-76cdc8f664-sx7wl 0/1 Pending 0 58s
kube-system etcd-control-plane 1/1 Running 3 71s
kube-system kube-apiserver-control-plane 1/1 Running 3 73s
kube-system kube-controller-manager-control-plane 1/1 Running 4 72s
kube-system kube-proxy-x5r56 1/1 Running 0 58s
kube-system kube-scheduler-control-plane 1/1 Running 4 71s


去掉control-plane的污点

1
2
3
4
# 默认情况下,出于安全原因,你的集群不会在控制平面节点上调度 Pod。 
# 如果你希望能够在单机 Kubernetes 集群等控制平面节点上调度 Pod,请运行:
kubectl taint nodes --all node-role.kubernetes.io/control-plane-

卸载reset

1
2
3
4
5
6
7
8
# kubeadm reset -f --cri-socket=unix:///var/run/cri-dockerd.sock
# kubeadm reset -f --cri-socket=unix:///var/run/containerd/containerd.sock

iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm -C
rm -rf /etc/cni/net.d $HOME/.kube/config

systemctl restart kubelet

CNI部署后检查信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
[root@control-plane calico]# kubectl get no -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
control-plane Ready control-plane 142m v1.29.0 192.168.1.237 <none> CentOS Linux 7 (Core) 3.10.0-1160.45.1.el7.x86_64 containerd://1.6.26
[root@control-plane calico]#
[root@control-plane calico]#
[root@control-plane calico]# kubectl get po --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
calico-apiserver calico-apiserver-85d58d95cd-8vr85 1/1 Running 2 140m
calico-apiserver calico-apiserver-85d58d95cd-mw8bv 1/1 Running 2 (9m59s ago) 140m
calico-system calico-kube-controllers-55c78f4978-4pg2k 1/1 Running 2 141m
calico-system calico-node-bxc2l 1/1 Running 2 (9m58s ago) 141m
calico-system calico-typha-74844b875c-qjvzw 1/1 Running 2 (9m57s ago) 141m
calico-system csi-node-driver-xchcd 2/2 Running 4 (9m57s ago) 141m
kube-system coredns-76cdc8f664-8tkck 1/1 Running 2 142m
kube-system coredns-76cdc8f664-r7pgh 1/1 Running 2 142m
kube-system etcd-control-plane 1/1 Running 6 (9m57s ago) 142m
kube-system kube-apiserver-control-plane 1/1 Running 2 (9m58s ago) 142m
kube-system kube-controller-manager-control-plane 1/1 Running 2 (9m58s ago) 142m
kube-system kube-proxy-6s7mv 1/1 Running 2 (9m59s ago) 142m
kube-system kube-scheduler-control-plane 1/1 Running 2 (9m58s ago) 142m
tigera-operator tigera-operator-55585899bf-6d7qm 1/1 Running 2 (9m59s ago) 141m
[root@control-plane calico]#

[root@control-plane calico]# crictl images
IMAGE TAG IMAGE ID SIZE
docker.io/calico/apiserver v3.27.0 848c5b919e8d3 40.3MB
docker.io/calico/cni v3.27.0 8e8d96a874c0e 94.7MB
docker.io/calico/csi v3.27.0 91c1c91da7602 8.74MB
docker.io/calico/kube-controllers v3.27.0 4e87edec0297d 33.3MB
docker.io/calico/node-driver-registrar v3.27.0 d36ef67f7b24c 11.2MB
docker.io/calico/node v3.27.0 1843802b91be8 117MB
docker.io/calico/pod2daemon-flexvol v3.27.0 6506d2e0be2d5 7.59MB
docker.io/calico/typha v3.27.0 b33768e0da1f8 29.6MB
docker.io/flannel/flannel-cni-plugin v1.2.0 a55d1bad692b7 3.88MB
docker.io/flannel/flannel v0.24.0 0dc86fe0f22e6 28MB
quay.io/tigera/operator v1.32.3 7bc79e0d3be4f 21.4MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/coredns v1.11.1 cbb01a7bd410d 18.2MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/etcd 3.5.10-0 a0eed15eed449 56.5MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-apiserver v1.29.0 1443a367b16d3 35.1MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-controller-manager v1.29.0 0824682bcdc8e 33.4MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-proxy v1.29.0 98262743b26f9 28.4MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/kube-scheduler v1.29.0 7ace497ddb8e8 18.5MB
registry.cn-shenzhen.aliyuncs.com/only_wild_weeds/pause 3.9 e6f1816883972 319kB

安装k9s

github地址: https://github.com/derailed/k9s

1
rpm -ivh https://github.com/derailed/k9s/releases/download/v0.30.7/k9s_linux_amd64.rpm

安装网络插件

https://kubernetes.io/docs/concepts/cluster-administration/addons/

常见开源网络插件(2024-01-03) watch star fork 备注
flannel 245 8.3k 2.9k 不支持NetworkPolicy
calico 106 5.2k 1.2k
cilium 252 17.5k 2.6k 这个星最多, 但是内核要求4.9+
Weave Net 242 6.5k 670
kube-router 54 2.2k 462
romana 16 246 38

calico

calico 支持网络策略
https://docs.tigera.io/calico/latest/getting-started/kubernetes/quickstart

部署calico

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
mkdir -p /data/k8s/init-install/cni/calico
cd /data/k8s/init-install/cni/calico

# 安装 Tigera Calico 操作员和自定义资源定义。
wget https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/tigera-operator.yaml
kubectl create -f tigera-operator.yaml

# 通过创建必要的自定义资源来安装 Calico。有关此清单中可用配置选项的更多信息,请参阅[安装参考](https://docs.tigera.io/calico/latest/reference/installation/api)
wget https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/custom-resources.yaml

# 修改custom-resources.yaml中的网段, 保持和kubeadm.conf中的 podSubnet: 10.244.0.0/16 一致
sed -i 's#192.168.0.0#10.244.0.0#g' custom-resources.yaml
kubectl create -f custom-resources.yaml

# 确保资源正常运行, 下载镜像时间要蛮久的, 家里网络花了14m,
watch kubectl get pods -n calico-system -o wide

# 卸载网络插件后要删除/etc/cni/net.d/下的文件, 避免冲突
kubectl delete -f custom-resources.yaml
kubectl delete -f tigera-operator.yaml
rm -rf /etc/cni/net.d/10-calico.conflist
rm -rf /etc/cni/net.d/10-kuberouter.conflist
rm -rf /etc/cni/net.d/calico-kubeconfig


安装配置calicoctl

https://docs.tigera.io/calico/latest/operations/calicoctl/install

1
2
3
4
5
6
7
8
9
10
11
# 下载安装
curl -L https://github.com/projectcalico/calico/releases/download/v3.27.0/calicoctl-linux-amd64 -o /usr/sbin/calicoctl

# 给执行权限
chmod +x /usr/sbin/calicoctl

# k8s命令行
# https://docs.tigera.io/calico/latest/operations/calicoctl/configure/kdd
DATASTORE_TYPE=kubernetes KUBECONFIG=~/.kube/config calicoctl get nodes

# 更多命令: https://docs.tigera.io/calico/latest/reference/calicoctl/

Flannel

flannel 不支持网络策略
https://github.com/flannel-io/flannel#deploying-flannel-manually

1
2
3
4
mkdir -p /data/k8s/init-install/cni/flannel
cd /data/k8s/init-install/cni/flannel
wget https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml
kubectl apply -f kube-flannel.yml