Skip to main content

多Master集群部署

info
  • 最低节点要求:3 台节点。
  • Master 节点支持冗余,一台 Master 宕机,集群仍可正常操作和运行工作负载。
  • 标准版集群与专业版集群,三台微服务节点时采用多 Master 集群部署。

本文档基于操作系统 CentOS 7.9 / Debian 12 进行部署 Kubernetes 集群

服务器IP主机角色
192.168.10.20Kubernetes 01(Master、Node)
192.168.10.21Kubernetes 02(Master、Node)
192.168.10.22Kubernetes 03(Master、Node)

服务器要求

  • 集群服务器之间网络策略无限制
  • 集群服务器之间主机名不能重复
  • 主网卡 MAC 地址不能重复【 ip link 查看 】
  • product_id 不能重复【 cat /sys/class/dmi/id/product_uuid 】
  • kubelet 的6443端口未被占用【 nc -vz 127.0.0.1 6443 】
  • 禁用 swap 内存【 执行 swapoff -a 命令进行禁用,并且 /etc/fstab 中禁用 swap 分区挂载 】

配置HOSTS

在 Kubernetes 集群各节点中添加以下 hosts 信息,将 k8s-master 指向三个 master 节点

cat >> /etc/hosts << EOF
192.168.10.20 k8s-master
192.168.10.21 k8s-master
192.168.10.22 k8s-master
EOF
  • 注意 Kubernetes 集群中每个节点都要添加此 hosts 信息,包括集群后续新增的节点。

安装CRI容器运行环境

Kubernetes 集群各节点均需要操作

  1. 下载 docker 安装包

    wget http://pdpublic.mingdao.com/private-deployment/offline/common/docker-27.3.1.tgz
  2. 安装 docker

    tar -zxvf docker-27.3.1.tgz
    mv -f docker/* /usr/local/bin/
  3. 创建 docker 与 containerd 配置文件目录

    mkdir /etc/docker
    mkdir /etc/containerd
  4. 创建 docker 的 daemon.json 文件

    cat > /etc/docker/daemon.json <<\EOF
    {
    "registry-mirrors": ["https://uvlkeb6d.mirror.aliyuncs.com"],
    "data-root": "/data/docker",
    "max-concurrent-downloads": 10,
    "exec-opts": ["native.cgroupdriver=cgroupfs"],
    "storage-driver": "overlay2",
    "default-address-pools":[{"base":"172.80.0.0/16","size":24}],
    "insecure-registries": ["127.0.0.1:5000"]
    }
    EOF
  5. 创建 containerd 的 config.toml 文件

    cat > /etc/containerd/config.toml <<\EOF
    disabled_plugins = []
    imports = []
    oom_score = 0
    plugin_dir = ""
    required_plugins = []
    root = "/data/containerd"
    state = "/run/containerd"
    temp = ""
    version = 2

    [cgroup]
    path = ""

    [debug]
    address = ""
    format = ""
    gid = 0
    level = ""
    uid = 0

    [grpc]
    address = "/var/run/containerd/containerd.sock"
    gid = 0
    max_recv_message_size = 16777216
    max_send_message_size = 16777216
    tcp_address = ""
    tcp_tls_ca = ""
    tcp_tls_cert = ""
    tcp_tls_key = ""
    uid = 0

    [metrics]
    address = ""
    grpc_histogram = false

    [plugins]

    [plugins."io.containerd.gc.v1.scheduler"]
    deletion_threshold = 0
    mutation_threshold = 100
    pause_threshold = 0.02
    schedule_delay = "0s"
    startup_delay = "100ms"

    [plugins."io.containerd.grpc.v1.cri"]
    device_ownership_from_security_context = false
    disable_apparmor = false
    disable_cgroup = false
    disable_hugetlb_controller = true
    disable_proc_mount = false
    disable_tcp_service = true
    enable_selinux = false
    enable_tls_streaming = false
    enable_unprivileged_icmp = false
    enable_unprivileged_ports = false
    ignore_image_defined_volumes = false
    max_concurrent_downloads = 3
    max_container_log_line_size = 16384
    netns_mounts_under_state_dir = false
    restrict_oom_score_adj = false
    sandbox_image = "127.0.0.1:5000/pause:3.8"
    selinux_category_range = 1024
    stats_collect_period = 10
    stream_idle_timeout = "4h0m0s"
    stream_server_address = "127.0.0.1"
    stream_server_port = "0"
    systemd_cgroup = false
    tolerate_missing_hugetlb_controller = true
    unset_seccomp_profile = ""

    [plugins."io.containerd.grpc.v1.cri".cni]
    bin_dir = "/usr/local/kubernetes/cni/bin"
    conf_dir = "/etc/cni/net.d"
    conf_template = ""
    ip_pref = ""
    max_conf_num = 1

    [plugins."io.containerd.grpc.v1.cri".containerd]
    default_runtime_name = "runc"
    disable_snapshot_annotations = true
    discard_unpacked_layers = false
    ignore_rdt_not_enabled_errors = false
    no_pivot = false
    snapshotter = "overlayfs"

    [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = ""

    [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = "io.containerd.runc.v2"

    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
    BinaryName = ""
    CriuImagePath = ""
    CriuPath = ""
    CriuWorkPath = ""
    IoGid = 0
    IoUid = 0
    NoNewKeyring = false
    NoPivotRoot = false
    Root = ""
    ShimCgroup = ""
    SystemdCgroup = true

    [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
    base_runtime_spec = ""
    cni_conf_dir = ""
    cni_max_conf_num = 0
    container_annotations = []
    pod_annotations = []
    privileged_without_host_devices = false
    runtime_engine = ""
    runtime_path = ""
    runtime_root = ""
    runtime_type = ""

    [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

    [plugins."io.containerd.grpc.v1.cri".image_decryption]
    key_model = "node"

    [plugins."io.containerd.grpc.v1.cri".registry]
    config_path = ""

    [plugins."io.containerd.grpc.v1.cri".registry.auths]

    [plugins."io.containerd.grpc.v1.cri".registry.configs]

    [plugins."io.containerd.grpc.v1.cri".registry.headers]

    [plugins."io.containerd.grpc.v1.cri".registry.mirrors]

    [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
    tls_cert_file = ""
    tls_key_file = ""

    [plugins."io.containerd.internal.v1.opt"]
    path = "/opt/containerd"

    [plugins."io.containerd.internal.v1.restart"]
    interval = "10s"

    [plugins."io.containerd.internal.v1.tracing"]
    sampling_ratio = 1.0
    service_name = "containerd"

    [plugins."io.containerd.metadata.v1.bolt"]
    content_sharing_policy = "shared"

    [plugins."io.containerd.monitor.v1.cgroups"]
    no_prometheus = false

    [plugins."io.containerd.runtime.v1.linux"]
    no_shim = false
    runtime = "runc"
    runtime_root = ""
    shim = "containerd-shim"
    shim_debug = false

    [plugins."io.containerd.runtime.v2.task"]
    platforms = ["linux/amd64"]
    sched_core = false

    [plugins."io.containerd.service.v1.diff-service"]
    default = ["walking"]

    [plugins."io.containerd.service.v1.tasks-service"]
    rdt_config_file = ""

    [plugins."io.containerd.snapshotter.v1.aufs"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.btrfs"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.devmapper"]
    async_remove = false
    base_image_size = ""
    discard_blocks = false
    fs_options = ""
    fs_type = ""
    pool_name = ""
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.native"]
    root_path = ""

    [plugins."io.containerd.snapshotter.v1.overlayfs"]
    root_path = ""
    upperdir_label = false

    [plugins."io.containerd.snapshotter.v1.zfs"]
    root_path = ""

    [plugins."io.containerd.tracing.processor.v1.otlp"]
    endpoint = ""
    insecure = false
    protocol = ""

    [proxy_plugins]

    [stream_processors]

    [stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
    args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar"

    [stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
    accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
    args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
    env = ["OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf"]
    path = "ctd-decoder"
    returns = "application/vnd.oci.image.layer.v1.tar+gzip"

    [timeouts]
    "io.containerd.timeout.bolt.open" = "0s"
    "io.containerd.timeout.shim.cleanup" = "5s"
    "io.containerd.timeout.shim.load" = "5s"
    "io.containerd.timeout.shim.shutdown" = "3s"
    "io.containerd.timeout.task.state" = "2s"

    [ttrpc]
    address = ""
    gid = 0
    uid = 0
    EOF
  6. 配置 docker 的 systemd 文件

    cat > /etc/systemd/system/docker.service <<EOF
    [Unit]
    Description=Docker
    After=network-online.target
    Wants=network-online.target
    Requires=containerd.service
    [Service]
    Type=notify
    ExecStart=/usr/local/bin/dockerd --containerd /var/run/containerd/containerd.sock
    ExecReload=/bin/kill -s HUP \$MAINPID
    LimitNOFILE=1024000
    LimitNPROC=infinity
    LimitCORE=0
    TimeoutStartSec=0
    Delegate=yes
    KillMode=process
    Restart=on-failure
    StartLimitBurst=3
    StartLimitInterval=60s
    [Install]
    WantedBy=multi-user.target
    EOF
  7. 配置 containerd 的 systemd 文件

    cat > /etc/systemd/system/containerd.service <<EOF
    [Unit]
    Description=containerd
    After=network-online.target
    Wants=network-online.target
    [Service]
    Type=notify
    ExecStart=/usr/local/bin/containerd --config /etc/containerd/config.toml
    LimitNOFILE=1024000
    LimitNPROC=infinity
    LimitCORE=0
    TimeoutStartSec=0
    Delegate=yes
    KillMode=process
    Restart=on-failure
    StartLimitBurst=3
    StartLimitInterval=60s
    [Install]
    WantedBy=multi-user.target
    EOF
  8. 启动 containerd 与 docker 并加入开机自启动

    systemctl daemon-reload && systemctl restart containerd && systemctl enable containerd
    systemctl daemon-reload && systemctl restart docker && systemctl enable docker

安装CNI插件

Kubernetes 集群各节点均需要操作

  1. 下载 cni 插件文件

    wget http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/cni-plugins-linux-amd64-v1.1.1.tgz
  2. 创建 cni 文件安装目录

    mkdir -p /usr/local/kubernetes/cni/bin
  3. 解压 cni 插件到安装目录

    tar -zxvf cni-plugins-linux-amd64-v1.1.1.tgz -C /usr/local/kubernetes/cni/bin

安装 K8S 集群所需命令

安装 crictl/kubeadm/kubelet/kubectl 命令,Kubernetes 集群各节点均需要操作

  1. 创建命令安装目录

    mkdir -p /usr/local/kubernetes/bin
  2. 下载命令文件至安装目录

    wget http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/crictl-v1.25.0-linux-amd64.tar.gz
    tar -zxvf crictl-v1.25.0-linux-amd64.tar.gz -C /usr/local/kubernetes/bin
    curl -o /usr/local/kubernetes/bin/kubeadm http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/kubeadm
    curl -o /usr/local/kubernetes/bin/kubelet http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/kubelet
    curl -o /usr/local/kubernetes/bin/kubectl http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/kubectl
  3. 赋予命令文件可执行权限

    chmod +x /usr/local/kubernetes/bin/*
    chown $(whoami):$(groups) /usr/local/kubernetes/bin/*
  4. 配置 systemd 管理 kubelet

    cat > /etc/systemd/system/kubelet.service <<\EOF
    [Unit]
    Description=kubelet: The Kubernetes Node Agent
    Documentation=https://kubernetes.io/docs/home/
    Wants=network-online.target
    After=network-online.target

    [Service]
    ExecStart=/usr/local/kubernetes/bin/kubelet
    Restart=always
    StartLimitInterval=0
    RestartSec=10

    [Install]
    WantedBy=multi-user.target
    EOF
  5. 配置 systemd 管理 kubeadm

    mkdir -p /etc/systemd/system/kubelet.service.d

    cat > /etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<\EOF
    # Note: This dropin only works with kubeadm and kubelet v1.11+
    [Service]
    Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
    Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
    # This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
    EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
    # This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
    # the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
    EnvironmentFile=-/etc/default/kubelet
    ExecStart=
    ExecStart=/usr/local/kubernetes/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
    EOF
  6. 启动 kubelet 并加入开机自启动

    systemctl daemon-reload && systemctl restart kubelet && systemctl enable kubelet
    • 这里 restart 之后无需查看服务状态,后续步骤 kubeadm init 和 kubeadm join 之后该服务会自动拉起
  7. 配置 K8S 命令所在目录并加入环境变量

    export PATH=/usr/local/kubernetes/bin/:$PATH
    echo 'export PATH=/usr/local/kubernetes/bin/:$PATH' >> /etc/bashrc
  8. 配置防止后续 crictl 拉取镜像出错

    crictl config runtime-endpoint unix:///run/containerd/containerd.sock

安装环境依赖

Kubernetes 集群各节点均需要操作

  1. 安装环境依赖 socat/conntrack

    # centos / redhat 使用 yum 安装
    yum install -y socat conntrack-tools

    # debian / ubuntu 使用 apt 安装
    apt install -y socat conntrack
  2. 检查命令是否缺失

    docker --version && dockerd --version && pgrep -f 'dockerd' && crictl --version && kubeadm version && kubelet --version && kubectl version --client=true && socat -V | grep 'socat version' && conntrack --version && echo ok || echo error
    • 输出 ok 代表正常,输出 error 则需根据错误补全命令

修改内核配置

Kubernetes 集群各节点均需要操作

  1. 添加内核模块

    cat > /etc/modules-load.d/kubernetes.conf <<EOF
    overlay
    br_netfilter
    ip_vs
    ip_vs_rr
    ip_vs_wrr
    ip_vs_sh
    EOF
  2. 加载模块

    modprobe overlay
    modprobe br_netfilter
    modprobe ip_vs
    modprobe ip_vs_rr
    modprobe ip_vs_wrr
    modprobe ip_vs_sh
  3. 添加内核参数

    cat >> /etc/sysctl.conf <<EOF
    net.bridge.bridge-nf-call-iptables = 1
    net.bridge.bridge-nf-call-ip6tables = 1
    net.ipv4.ip_forward = 1
    vm.max_map_count = 262144

    # MD Config
    net.nf_conntrack_max = 524288
    net.ipv4.tcp_max_tw_buckets = 5000
    net.ipv4.tcp_window_scaling = 1
    net.ipv4.tcp_rmem = 8192 87380 16777216
    net.ipv4.tcp_wmem = 8192 65536 16777216
    net.ipv4.tcp_max_syn_backlog = 32768
    net.core.netdev_max_backlog = 32768
    net.core.netdev_budget = 600
    net.core.somaxconn = 32768
    net.core.wmem_default = 8388608
    net.core.rmem_default = 8388608
    net.core.rmem_max = 16777216
    net.core.wmem_max = 16777216
    net.ipv4.tcp_timestamps = 1
    net.ipv4.tcp_synack_retries = 2
    net.ipv4.tcp_syn_retries = 2
    net.ipv4.tcp_tw_recycle = 0
    net.ipv4.tcp_tw_reuse = 1
    net.ipv4.tcp_fin_timeout = 2
    net.ipv4.tcp_mem = 8388608 12582912 16777216
    net.ipv4.ip_local_port_range = 1024 65000
    net.ipv4.tcp_max_orphans = 16384
    net.ipv4.tcp_keepalive_intvl = 10
    net.ipv4.tcp_keepalive_probes = 3
    net.ipv4.tcp_keepalive_time = 600
    vm.max_map_count = 262144
    net.netfilter.nf_conntrack_tcp_be_liberal = 0
    net.netfilter.nf_conntrack_tcp_max_retrans = 3
    net.netfilter.nf_conntrack_tcp_timeout_max_retrans = 300
    net.netfilter.nf_conntrack_tcp_timeout_established = 86400
    fs.inotify.max_user_watches=10485760
    fs.inotify.max_user_instances=10240
    EOF

    sysctl --system

K8S 环境镜像准备

Kubernetes 集群各节点均需要操作

  1. 加载离线镜像

    wget http://pdpublic.mingdao.com/private-deployment/offline/common/kubernetes-1.25.4/kubeadm-1.25.4-images.tar.gz
    docker load -i kubeadm-1.25.4-images.tar.gz
  2. 启动本地仓库给镜像打标签

    docker run -d -p 5000:5000 --restart always --name registry registry:2
    for i in $(docker images | grep 'registry.k8s.io\|rancher' | awk 'NR!=0{print $1":"$2}');do docker tag $i $(echo $i | sed -e "s/registry.k8s.io/127.0.0.1:5000/" -e "s#coredns/##" -e "s/rancher/127.0.0.1:5000/");done
    for i in $(docker images | grep :5000 | awk 'NR!=0{print $1":"$2}');do docker push $i;done
    docker images | grep :5000

初始化第一个主节点

仅在 Kubernetes 01 节点操作

  1. 初始化 master 节点

    kubeadm init --control-plane-endpoint "k8s-master:6443" --upload-certs --cri-socket unix:///var/run/containerd/containerd.sock -v 5 --kubernetes-version=1.25.4 --image-repository=127.0.0.1:5000 --pod-network-cidr=10.244.0.0/16

    尾部输出类似于:

    ...
    You can now join any number of control-plane node by running the following command on each as a root:
    kubeadm join k8s-master:6443 --token 9vr73a.a8uxyaju799qwdjv --discovery-token-ca-cert-hash sha256:7c2e69131a36ae2a042a339b33381c6d0d43887e2de83720eff5359e26aec866 --control-plane --certificate-key f8902e114ef118304e561c3ecd4d0b543adc226b7a07f675f56564185ffe0c07

    Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
    As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use kubeadm init phase upload-certs to reload certs afterward.

    Then you can join any number of worker nodes by running the following on each as root:
    kubeadm join k8s-master:6443 --token 9vr73a.a8uxyaju799qwdjv --discovery-token-ca-cert-hash sha256:7c2e69131a36ae2a042a339b33381c6d0d43887e2de83720eff5359e26aec866
    • 将此输出复制到文本文件。 稍后你将需要它来将 master 和 node 节点加入集群。
  2. 修改 nodePort 可使用端口范围

    sed -i '/- kube-apiserver/a\ \ \ \ - --service-node-port-range=1024-32767' /etc/kubernetes/manifests/kube-apiserver.yaml
  3. 设置配置路径

    export KUBECONFIG=/etc/kubernetes/admin.conf
    echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> /etc/bashrc
  4. 调整当前节点 Pod 上限

    echo "maxPods: 300" >> /var/lib/kubelet/config.yaml
    systemctl restart kubelet
  5. 允许 master 参与调度

    • 在初始化 master 节点后大概要等待1-2分钟左右再执行下方命令

    • 执行前需先检查 kubelet 服务状态 systemctl status kubelet,看下是否为 running

    kubectl taint node $(kubectl get node | grep control-plane | awk '{print $1}') node-role.kubernetes.io/control-plane:NoSchedule-
    • 此命令执行后,正确输出为:"xxxx untainted",如果输出不符,则需稍加等待,再次执行进行确认
  6. 安装网络插件

    cat > /usr/local/kubernetes/kube-flannel.yml <<EOF
    ---
    kind: Namespace
    apiVersion: v1
    metadata:
    name: kube-flannel
    labels:
    pod-security.kubernetes.io/enforce: privileged
    ---
    kind: ClusterRole
    apiVersion: rbac.authorization.k8s.io/v1
    metadata:
    name: flannel
    rules:
    - apiGroups:
    - ""
    resources:
    - pods
    verbs:
    - get
    - apiGroups:
    - ""
    resources:
    - nodes
    verbs:
    - list
    - watch
    - apiGroups:
    - ""
    resources:
    - nodes/status
    verbs:
    - patch
    ---
    kind: ClusterRoleBinding
    apiVersion: rbac.authorization.k8s.io/v1
    metadata:
    name: flannel
    roleRef:
    apiGroup: rbac.authorization.k8s.io
    kind: ClusterRole
    name: flannel
    subjects:
    - kind: ServiceAccount
    name: flannel
    namespace: kube-system
    ---
    apiVersion: v1
    kind: ServiceAccount
    metadata:
    name: flannel
    namespace: kube-system
    ---
    kind: ConfigMap
    apiVersion: v1
    metadata:
    name: kube-flannel-cfg
    namespace: kube-system
    labels:
    tier: node
    app: flannel
    data:
    cni-conf.json: |
    {
    "name": "cbr0",
    "cniVersion": "0.3.1",
    "plugins": [
    {
    "type": "flannel",
    "delegate": {
    "hairpinMode": true,
    "isDefaultGateway": true
    }
    },
    {
    "type": "portmap",
    "capabilities": {
    "portMappings": true
    }
    }
    ]
    }
    net-conf.json: |
    {
    "Network": "10.244.0.0/16",
    "Backend": {
    "Type": "vxlan"
    }
    }
    ---
    apiVersion: apps/v1
    kind: DaemonSet
    metadata:
    name: kube-flannel-ds
    namespace: kube-system
    labels:
    tier: node
    app: flannel
    spec:
    selector:
    matchLabels:
    app: flannel
    template:
    metadata:
    labels:
    tier: node
    app: flannel
    spec:
    affinity:
    nodeAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
    nodeSelectorTerms:
    - matchExpressions:
    - key: kubernetes.io/os
    operator: In
    values:
    - linux
    hostNetwork: true
    priorityClassName: system-node-critical
    tolerations:
    - operator: Exists
    effect: NoSchedule
    serviceAccountName: flannel
    initContainers:
    - name: install-cni-plugin
    #image: flannelcni/flannel-cni-plugin:v1.1.0 for ppc64le and mips64le (dockerhub limitations may apply)
    image: 127.0.0.1:5000/mirrored-flannelcni-flannel-cni-plugin:v1.1.0
    command:
    - cp
    args:
    - -f
    - /flannel
    - /opt/cni/bin/flannel
    volumeMounts:
    - name: cni-plugin
    mountPath: /opt/cni/bin
    - name: install-cni
    #image: flannelcni/flannel:v0.20.1 for ppc64le and mips64le (dockerhub limitations may apply)
    image: 127.0.0.1:5000/mirrored-flannelcni-flannel:v0.20.1
    command:
    - cp
    args:
    - -f
    - /etc/kube-flannel/cni-conf.json
    - /etc/cni/net.d/10-flannel.conflist
    volumeMounts:
    - name: cni
    mountPath: /etc/cni/net.d
    - name: flannel-cfg
    mountPath: /etc/kube-flannel/
    containers:
    - name: kube-flannel
    #image: flannelcni/flannel:v0.20.1 for ppc64le and mips64le (dockerhub limitations may apply)
    image: 127.0.0.1:5000/mirrored-flannelcni-flannel:v0.20.1
    command:
    - /opt/bin/flanneld
    args:
    - --ip-masq
    - --kube-subnet-mgr
    resources:
    requests:
    cpu: "100m"
    memory: "50Mi"
    limits:
    cpu: "100m"
    memory: "50Mi"
    securityContext:
    privileged: false
    capabilities:
    add: ["NET_ADMIN", "NET_RAW"]
    env:
    - name: POD_NAME
    valueFrom:
    fieldRef:
    fieldPath: metadata.name
    - name: POD_NAMESPACE
    valueFrom:
    fieldRef:
    fieldPath: metadata.namespace
    - name: EVENT_QUEUE_DEPTH
    value: "5000"
    volumeMounts:
    - name: run
    mountPath: /run/flannel
    - name: flannel-cfg
    mountPath: /etc/kube-flannel/
    - name: xtables-lock
    mountPath: /run/xtables.lock
    volumes:
    - name: run
    hostPath:
    path: /run/flannel
    - name: cni-plugin
    hostPath:
    path: /usr/local/kubernetes/cni/bin
    - name: cni
    hostPath:
    path: /etc/cni/net.d
    - name: flannel-cfg
    configMap:
    name: kube-flannel-cfg
    - name: xtables-lock
    hostPath:
    path: /run/xtables.lock
    type: FileOrCreate
    EOF

    kubectl apply -f /usr/local/kubernetes/kube-flannel.yml

将其他主节点加入集群

需在Kubernetes 02/03 节点上进行操作

  1. 加入 Kubernetes 集群

    kubeadm join k8s-master:6443 --token 9vr73a.a8uxyaju799qwdjv --discovery-token-ca-cert-hash sha256:7c2e69131a36ae2a042a339b33381c6d0d43887e2de83720eff5359e26aec866 --control-plane --certificate-key f8902e114ef118304e561c3ecd4d0b543adc226b7a07f675f56564185ffe0c07
    • 此命令为在主节点执行 kubeadm init 成功后输出,此处的为示例,每个集群都不同

    • 如遗忘的话请参考以下步骤在第一个主节点重新获取:

      1. 重新生成 join 命令
        kubeadm token create --print-join-command
      2. 重新上传证书并生成新的解密密钥
        kubeadm init phase upload-certs --upload-certs
      3. 拼接 join 命令,新增 --control-plane --certificate-key 参数,并将生成的解密密钥作为 --certificate-key 参数值
        kubeadm join k8s-master:6443 --token 1b6i9d.0qqufwsjrjpuhkwo --discovery-token-ca-cert-hash sha256:3d28faa49e9cac7dd96aded0bef33a6af1ced57e45f0b12c6190f3d4e1055456 --control-plane --certificate-key 57a0f0e9be1d9f1c74bab54a52faa143ee9fd9c26a60f1b3b816b17b93ecaf6f
        • 至此,得到了 master 节点加入集群的 join 命令
  2. 修改 nodePort 可使用端口范围

    sed -i '/- kube-apiserver/a\ \ \ \ - --service-node-port-range=1024-32767' /etc/kubernetes/manifests/kube-apiserver.yaml
  3. 设置配置路径

    export KUBECONFIG=/etc/kubernetes/admin.conf
    echo 'export KUBECONFIG=/etc/kubernetes/admin.conf' >> /etc/bashrc
  4. 调整当前节点 Pod 上限

    echo "maxPods: 300" >> /var/lib/kubelet/config.yaml
    systemctl restart kubelet
  5. 允许 master 参与调度

    • 在初始化完当前节点后大概要等待1-2分钟左右再执行下方命令

    • 执行前需先检查 kubelet 服务状态 systemctl status kubelet,看下是否为 running

    kubectl taint node $(kubectl get node | grep control-plane | awk '{print $1}') node-role.kubernetes.io/control-plane:NoSchedule-
    • 此命令执行后,正确输出为:"xxxx untainted",如果输出不符,则需稍加等待,再次执行进行确认

新增工作节点加入集群

例如 flink 节点或后续继续新增的微服务节点,都是以工作节点加入当前多 master 的 kubernetes 集群

  1. 加入 kubernetes 集群

    kubeadm join 192.168.10.20:6443 --token 3nwjzw.pdod3r27lnqqhi0x \
    --discovery-token-ca-cert-hash sha256:a84445303a0f8249e7eae3059cb99d46038dc275b2dc2043a022de187a1175a2
    • 此命令为在主节点执行 kubeadm init 成功后输出,此处的为示例,每个集群都不同
    • 如遗忘的话可以在主节点执行 kubeadm token create --print-join-command 重新获取
  2. 调整当前节点 Pod 上限

    echo "maxPods: 300" >> /var/lib/kubelet/config.yaml
    systemctl restart kubelet

集群状态检查

  1. 节点状态检查

    kubectl get pod -n kube-system    # READY列需要是"1/1"
    kubectl get node # STATUS列需要是"Ready"
  2. 下载镜像(各微服务节点均需要操作)

    提前下载并上传 centos:7.9.2009 镜像至各服务器

    离线镜像下载链接:http://pdpublic.mingdao.com/private-deployment/offline/common/centos7.9.2009.tar.gz

    各服务器加载离线镜像:

    gunzip -d centos7.9.2009.tar.gz
    ctr -n k8s.io image import centos7.9.2009.tar
  3. 仅在微服务01节点上写入配置启动测试容器

    cat > /usr/local/kubernetes/test.yaml <<\EOF
    apiVersion: apps/v1
    kind: Deployment
    metadata:
    name: test
    namespace: default
    spec:
    replicas: 3
    selector:
    matchLabels:
    app: test
    template:
    metadata:
    labels:
    app: test
    annotations:
    md-update: '20200517104741'
    spec:
    containers:
    - name: test
    image: centos:7.9.2009
    command:
    - sh
    - -c
    - |
    echo $(hostname) > hostname.txt
    python -m SimpleHTTPServer
    resources:
    limits:
    memory: 512Mi
    cpu: 1
    requests:
    memory: 64Mi
    cpu: 0.01
    volumeMounts:
    - name: tz-config
    mountPath: /etc/localtime
    volumes:
    - name: tz-config
    hostPath:
    path: /usr/share/zoneinfo/Etc/GMT-8

    ---

    apiVersion: v1
    kind: Service
    metadata:
    name: test
    namespace: default
    spec:
    selector:
    app: test
    ports:
    - name: external-test
    port: 8000
    targetPort: 8000
    nodePort: 8000
    type: NodePort
    EOF

    kubectl apply -f /usr/local/kubernetes/test.yaml
  4. 检查 Pod 状态

    kubectl get pod -o wide
  5. 测试访问

    curl 127.0.0.1:8000/hostname.txt
    • 多次 curl 正常应会返回不同 pod 的 hostname
  6. 如果 curl 到其他节点上的容器,返回需要约1秒左右时间,则关闭 flannel.1 的网络接口硬件卸载功能(kubernetes 集群中每个节点都需要配置)

    cat > /etc/systemd/system/disable-offload.service <<\EOF
    [Unit]
    Description=Disable offload for flannel.1
    After=network-online.target flanneld.service

    [Service]
    Type=oneshot
    ExecStartPre=/bin/bash -c 'while [ ! -d /sys/class/net/flannel.1 ]; do sleep 1; done'
    ExecStart=/sbin/ethtool --offload flannel.1 rx off tx off

    [Install]
    WantedBy=multi-user.target
    EOF

    重载systemd 配置并启动服务

    systemctl daemon-reload
    systemctl enable disable-offload
    systemctl start disable-offload

配置自动更新证书

集群内CA证书默认一年有效期,需要在到期前更新证书,避免受证书到期产生其他异常。

你可以使用 kubeadm certs check-expiration 子命令来检查证书何时过期,在到期前在每个 Master 节点通过 kubeadm certs renew all 命令更新所有证书,执行完此命令之后你需要重启控制面 Pod 才生效。


或者在每个 Kubernetes Master 节点编辑如下脚本文件,将脚本加入定时任务,自动检查进行更新。

步骤如下:

在 Kubernetes Master 各节点创建存放脚本的目录

mkdir /usr/local/kubernetes/script

执行 crontab -e 将以下定时任务写入配置

# Check and update kubernetes certificates regularly
0 1 * * * /bin/bash /usr/local/kubernetes/script/check_k8s_certs.sh
  • 每个节点定时任务执行时间实际需要做下修改,例如第一个节点凌晨一点,第二个与第三个节点分别将定时任务改为凌晨两点与凌晨三点。

使用 vim /usr/local/kubernetes/script/check_k8s_certs.sh 命令编辑脚本文件,将下方脚本内容复制进去

#!/bin/bash

export KUBECONFIG=/etc/kubernetes/admin.conf
export PATH=/usr/local/kubernetes/bin/:$PATH

#获取当前主机名(统一改为小写字母)
current_hostname=$(hostname | tr '[:upper:]' '[:lower:]')

# Kubernetes 证书目录
cert_dir="/etc/kubernetes/pki"

# 证书到期阈值
alert_days=7

# 日志函数
check_k8s_certs_log_file="/var/log/check_k8s_certs.log"
log_info() {
echo "$(date +"%Y-%m-%d %H:%M:%S") INFO: $1" >> "$check_k8s_certs_log_file"
}

log_error() {
echo "$(date +"%Y-%m-%d %H:%M:%S") ERROR: $1" >> "$check_k8s_certs_log_file"
}

# 当前日期
current_date=$(date +%s)

# 检查证书到期时间并更新
check_and_update_certs() {
certs_to_update=()
for cert in $(find $cert_dir -name "*.crt"); do

# 获取证书的到期日期
expiry_date=$(openssl x509 -enddate -noout -in $cert | cut -d= -f2)
expiry_date_seconds=$(date -d "$expiry_date" +%s)

# 计算剩余天数
days_left=$((($expiry_date_seconds - $current_date) / 86400))

# 如果证书到期阈值内,则加入更新列表
if [ $days_left -le $alert_days ]; then
certs_to_update+=("$cert")
fi
done

if [ ${#certs_to_update[@]} -eq 0 ]; then
log_info "Check completed, no certificates need to be updated."
else
for cert in "${certs_to_update[@]}"; do
log_info "$cert certificate will expire within $alert_days days and needs to be updated."
done

# 更新证书并重启相关组件
log_info "Start updating certificates"
if kubeadm certs renew all >> "$check_k8s_certs_log_file"; then
log_info "Certificate update successful."
sleep 60s
log_info "Start restarting control plane components"

# 重启控制平面组件
for pod_name in $(kubectl get pod -n kube-system | grep $current_hostname | grep "kube-apiserver\|kube-controller-manager\|kube-scheduler\|etcd" | awk '{print $1}'); do
if kubectl -n kube-system delete pod $pod_name; then
log_info "Restart $pod_name successful."
sleep 60s
else
log_error "Restart $pod_name failed."
fi
done
else
log_error "Certificate update failed."
fi
fi
}

main() {
log_info "Start checking the expiration time of Kubernetes certificates"
check_and_update_certs
}

main