Merge pull request #228 from k3s-io/default_k3s_script

Fix HA, simplify provisioning, add Vagrant test cluster
This commit is contained in:
Derek Nola
2023-11-08 15:40:11 -08:00
committed by GitHub
16 changed files with 237 additions and 133 deletions

5
.ansible-lint Normal file
View File

@@ -0,0 +1,5 @@
---
warn_list:
- var-naming[no-role-prefix]
- yaml[comments-indentation]
- yaml[line-length]

View File

@@ -22,7 +22,7 @@ on processor architecture:
## System requirements ## System requirements
Deployment environment must have Ansible 2.4.0+ Deployment environment must have Ansible 2.4.0+
Master and nodes must have passwordless SSH access Server and agent nodes must have passwordless SSH access
## Usage ## Usage
@@ -48,7 +48,7 @@ k3s_cluster:
If needed, you can also edit `vars` section at the bottom to match your environment. If needed, you can also edit `vars` section at the bottom to match your environment.
If multiple hosts are in the server group the playbook will automatically setup k3s in HA mode with embedded etcd. If multiple hosts are in the server group the playbook will automatically setup k3s in HA mode with embedded etcd.
An odd number of server nodes is recommended (3,5,7). Read the offical documentation below for more information and options. An odd number of server nodes is required (3,5,7). Read the offical documentation below for more information and options.
https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/ https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/
Using a loadbalancer or VIP as the API endpoint is preferred but not covered here. Using a loadbalancer or VIP as the API endpoint is preferred but not covered here.
@@ -61,8 +61,19 @@ ansible-playbook playbook/site.yml -i inventory.yml
## Kubeconfig ## Kubeconfig
To confirm access to your **Kubernetes** cluster use the following: After successful bringup, the kubeconfig of the cluster is copied to the control-node and set as default (`~/.kube/config`).
Assuming you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) installed, you to confirm access to your **Kubernetes** cluster use the following:
```bash ```bash
kubectl get nodes kubectl get nodes
``` ```
## Local Testing
A Vagrantfile is provided that provision a 5 nodes cluster using LibVirt or Virtualbox and Vagrant. To use it:
```bash
vagrant up
```
By default, each node is given 2 cores and 2GB of RAM and runs Ubuntu 20.04. You can customize these settings by editing the `Vagrantfile`.

56
Vagrantfile vendored Normal file
View File

@@ -0,0 +1,56 @@
# ENV['VAGRANT_NO_PARALLEL'] = 'no'
NODE_ROLES = ["server-0", "server-1", "server-2", "agent-0", "agent-1"]
NODE_BOXES = ['generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004']
NODE_CPUS = 2
NODE_MEMORY = 2048
# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks
NETWORK_PREFIX = "10.10.10"
def provision(vm, role, node_num)
vm.box = NODE_BOXES[node_num]
vm.hostname = role
# We use a private network because the default IPs are dynamicly assigned
# during provisioning. This makes it impossible to know the server-0 IP when
# provisioning subsequent servers and agents. A private network allows us to
# assign static IPs to each node, and thus provide a known IP for the API endpoint.
node_ip = "#{NETWORK_PREFIX}.#{100+node_num}"
# An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32
vm.network "private_network", ip: node_ip, netmask: "255.255.255.0"
vm.provision "ansible", run: 'once' do |ansible|
ansible.compatibility_mode = "2.0"
ansible.playbook = "playbook/site.yml"
ansible.groups = {
"server" => NODE_ROLES.grep(/^server/),
"agent" => NODE_ROLES.grep(/^agent/),
"k3s_cluster:children" => ["server", "agent"],
}
ansible.extra_vars = {
k3s_version: "v1.26.5+k3s1",
api_endpoint: "#{NETWORK_PREFIX}.100",
token: "myvagrant",
# Required to use the private network configured above
extra_server_args: "--node-external-ip #{node_ip} --flannel-iface eth1",
extra_agent_args: "--node-external-ip #{node_ip} --flannel-iface eth1",
}
end
end
Vagrant.configure("2") do |config|
# Default provider is libvirt, virtualbox is only provided as a backup
config.vm.provider "libvirt" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
end
config.vm.provider "virtualbox" do |v|
v.cpus = NODE_CPUS
v.memory = NODE_MEMORY
end
NODE_ROLES.each_with_index do |name, i|
config.vm.define name do |node|
provision(node.vm, name, i)
end
end
end

View File

@@ -9,14 +9,17 @@ k3s_cluster:
192.16.35.12 192.16.35.12
192.16.35.13 192.16.35.13
# Required Vars
vars: vars:
ansible_port: 22 ansible_port: 22
ansible_user: debian ansible_user: debian
k3s_version: v1.25.5+k3s2 k3s_version: v1.25.5+k3s2
k3s_server_location: /var/lib/rancher/k3s token: "mytoken" # Use ansible vault if you want to keep it secret
systemd_dir: /etc/systemd/system
api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}" api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}"
api_port: 6443
extra_server_args: "" extra_server_args: ""
extra_server_init_args: ""
extra_agent_args: "" extra_agent_args: ""
# Optional vars
# api_port: 6443
# k3s_server_location: /var/lib/rancher/k3s
# systemd_dir: /etc/systemd/system

View File

@@ -1,38 +1,17 @@
--- ---
- name: Download k3s binary x64 - name: Download k3s install script
ansible.builtin.get_url: ansible.builtin.get_url:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s url: https://get.k3s.io/
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-amd64.txt
timeout: 120 timeout: 120
dest: /usr/local/bin/k3s dest: /usr/local/bin/k3s-install.sh
owner: root owner: root
group: root group: root
mode: 0755 mode: 0755
when: ansible_facts.architecture == "x86_64"
- name: Download k3s binary arm64 - name: Download k3s binary
ansible.builtin.get_url: ansible.builtin.command:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-arm64 cmd: /usr/local/bin/k3s-install.sh
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm64.txt environment:
timeout: 120 INSTALL_K3S_SKIP_START: "true"
dest: /usr/local/bin/k3s INSTALL_K3S_VERSION: "{{ k3s_version }}"
owner: root changed_when: true
group: root
mode: 0755
when:
- ( ansible_facts.architecture is search("arm") and
ansible_facts.userspace_bits == "64" ) or
ansible_facts.architecture is search("aarch64")
- name: Download k3s binary armhf
ansible.builtin.get_url:
url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-armhf
checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm.txt
timeout: 120
dest: /usr/local/bin/k3s
owner: root
group: root
mode: 0755
when:
- ansible_facts.architecture is search("arm")
- ansible_facts.userspace_bits == "32"

View File

@@ -0,0 +1,4 @@
---
k3s_server_location: "/var/lib/rancher/k3s"
systemd_dir: "/etc/systemd/system"
api_port: 6443

View File

@@ -1,4 +1,5 @@
--- ---
- name: Copy K3s service file - name: Copy K3s service file
ansible.builtin.template: ansible.builtin.template:
src: "k3s-agent.service.j2" src: "k3s-agent.service.j2"
@@ -11,5 +12,5 @@
ansible.builtin.systemd: ansible.builtin.systemd:
name: k3s-agent name: k3s-agent
daemon_reload: true daemon_reload: true
state: restarted state: started
enabled: true enabled: true

View File

@@ -1,13 +1,17 @@
[Unit] [Unit]
Description=Lightweight Kubernetes Description=Lightweight Kubernetes
Documentation=https://k3s.io Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target After=network-online.target
[Install]
WantedBy=multi-user.target
[Service] [Service]
Type=notify Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter EnvironmentFile=-/etc/default/%N
ExecStartPre=-/sbin/modprobe overlay EnvironmentFile=-/etc/sysconfig/%N
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }} EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process KillMode=process
Delegate=yes Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead # Having non-zero Limit*s causes performance problems due to accounting overhead
@@ -19,6 +23,7 @@ TasksMax=infinity
TimeoutStartSec=0 TimeoutStartSec=0
Restart=always Restart=always
RestartSec=5s RestartSec=5s
ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service'
[Install] ExecStartPre=-/sbin/modprobe br_netfilter
WantedBy=multi-user.target ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }}

View File

@@ -0,0 +1,4 @@
---
k3s_server_location: "/var/lib/rancher/k3s"
systemd_dir: "/etc/systemd/system"
api_port: 6443

View File

@@ -2,50 +2,30 @@
- name: Init first server node - name: Init first server node
when: ansible_hostname == groups['server'][0] when: ansible_hostname == groups['server'][0]
block: block:
- name: Start temporary service for HA cluster - name: Copy K3s service file [Single]
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}}
# noqa: jinja[spacing]
creates: "{{ k3s_server_location }}/server/node-token"
when: groups['server'] | length > 1
- name: Start temporary service for single server cluster
ansible.builtin.command:
cmd: >
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
creates: "{{ k3s_server_location }}/server/node-token"
when: groups['server'] | length == 1 when: groups['server'] | length == 1
ansible.builtin.template:
src: "k3s-single.service.j2"
dest: "{{ systemd_dir }}/k3s.service"
owner: root
group: root
mode: 0644
- name: Wait for node-token - name: Copy K3s service file [HA]
ansible.builtin.wait_for: when: groups['server'] | length > 1
path: "{{ k3s_server_location }}/server/node-token" ansible.builtin.template:
src: "k3s-cluster-init.service.j2"
dest: "{{ systemd_dir }}/k3s.service"
owner: root
group: root
mode: 0644
- name: Register node-token file access mode - name: Enable and check K3s service
ansible.builtin.stat: ansible.builtin.systemd:
path: "{{ k3s_server_location }}/server/node-token" name: k3s
register: p daemon_reload: true
state: started
- name: Change file access node-token enabled: true
ansible.builtin.file:
path: "{{ k3s_server_location }}/server/node-token"
mode: "g+rx,o+rx"
- name: Read node-token from server
ansible.builtin.slurp:
path: "{{ k3s_server_location }}/server/node-token"
register: node_token
- name: Store server node-token
ansible.builtin.set_fact:
token: "{{ node_token.content | b64decode | regex_replace('\n', '') }}"
- name: Restore node-token file access
ansible.builtin.file:
path: "{{ k3s_server_location }}/server/node-token"
mode: "{{ p.stat.mode }}"
- name: Create directory .kube - name: Create directory .kube
ansible.builtin.file: ansible.builtin.file:
@@ -54,6 +34,10 @@
owner: "{{ ansible_user }}" owner: "{{ ansible_user }}"
mode: "u=rwx,g=rx,o=" mode: "u=rwx,g=rx,o="
- name: Pause to allow server startup
ansible.builtin.pause:
seconds: 10
- name: Copy config file to user home directory - name: Copy config file to user home directory
ansible.builtin.copy: ansible.builtin.copy:
src: /etc/rancher/k3s/k3s.yaml src: /etc/rancher/k3s/k3s.yaml
@@ -76,17 +60,28 @@
flat: true flat: true
- name: Start other server if any and verify status - name: Start other server if any and verify status
when:
- (groups['server'] | length) > 1
- ansible_hostname != groups['server'][0]
block: block:
- name: Init additonal server nodes - name: Copy K3s service file [HA]
ansible.builtin.command: when: groups['server'] | length > 1
cmd: > ansible.builtin.template:
systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server src: "k3s-ha.service.j2"
--token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }} dest: "{{ systemd_dir }}/k3s.service"
--tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} owner: root
creates: "{{ k3s_server_location }}/server/node-token" group: root
when: ansible_hostname != groups['server'][0] mode: 0644
- name: Enable and check K3s service
ansible.builtin.systemd:
name: k3s
daemon_reload: true
state: started
enabled: true
- name: Verify that all server nodes joined - name: Verify that all server nodes joined
when: (groups['server'] | length) > 1
ansible.builtin.command: ansible.builtin.command:
cmd: > cmd: >
k3s kubectl get nodes -l "node-role.kubernetes.io/control-plane=true" -o=jsonpath="{.items[*].metadata.name}" k3s kubectl get nodes -l "node-role.kubernetes.io/control-plane=true" -o=jsonpath="{.items[*].metadata.name}"
@@ -95,28 +90,6 @@
retries: 20 retries: 20
delay: 10 delay: 10
changed_when: false changed_when: false
always:
- name: Kill the temporary init service
ansible.builtin.systemd:
name: k3s-init
state: stopped
failed_when: false
- name: Copy K3s service file
ansible.builtin.template:
src: "k3s-server.service.j2"
dest: "{{ systemd_dir }}/k3s-server.service"
owner: root
group: root
mode: 0644
register: k3s_service
- name: Enable and check K3s service
ansible.builtin.systemd:
name: k3s-server
daemon_reload: true
state: restarted
enabled: true
- name: Create symlinks - name: Create symlinks
ansible.builtin.file: ansible.builtin.file:

View File

@@ -0,0 +1,28 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=1048576
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --cluster-init --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }}

View File

@@ -0,0 +1,28 @@
[Unit]
Description=Lightweight Kubernetes
Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target
[Install]
WantedBy=multi-user.target
[Service]
Type=notify
EnvironmentFile=-/etc/default/%N
EnvironmentFile=-/etc/sysconfig/%N
EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=1048576
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
TimeoutStartSec=0
Restart=always
RestartSec=5s
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_server_args }}

View File

@@ -1,13 +1,17 @@
[Unit] [Unit]
Description=Lightweight Kubernetes Description=Lightweight Kubernetes
Documentation=https://k3s.io Documentation=https://k3s.io
Wants=network-online.target
After=network-online.target After=network-online.target
[Install]
WantedBy=multi-user.target
[Service] [Service]
Type=notify Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter EnvironmentFile=-/etc/default/%N
ExecStartPre=-/sbin/modprobe overlay EnvironmentFile=-/etc/sysconfig/%N
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} EnvironmentFile=-/etc/systemd/system/k3s.service.env
KillMode=process KillMode=process
Delegate=yes Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead # Having non-zero Limit*s causes performance problems due to accounting overhead
@@ -19,6 +23,6 @@ TasksMax=infinity
TimeoutStartSec=0 TimeoutStartSec=0
Restart=always Restart=always
RestartSec=5s RestartSec=5s
ExecStartPre=-/sbin/modprobe br_netfilter
[Install] ExecStartPre=-/sbin/modprobe overlay
WantedBy=multi-user.target ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }}

View File

@@ -4,6 +4,12 @@
state: disabled state: disabled
when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat']
- name: Install Dependent Ubuntu Packages
when: ansible_distribution in ['Ubuntu']
ansible.builtin.apt:
name: policycoreutils # Used by install script to restore SELinux context
update_cache: yes
- name: Enable IPv4 forwarding - name: Enable IPv4 forwarding
ansible.posix.sysctl: ansible.posix.sysctl:
name: net.ipv4.ip_forward name: net.ipv4.ip_forward
@@ -53,16 +59,12 @@
validate: 'visudo -cf %s' validate: 'visudo -cf %s'
when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat']
- name: Make k3s directory
ansible.builtin.file:
path: "/var/lib/rancher"
mode: 0755
state: directory
- name: Create symlink - name: Create symlink
ansible.builtin.file: ansible.builtin.file:
dest: /var/lib/rancher/k3s dest: /var/lib/rancher/k3s
src: "{{ k3s_server_location }}" src: "{{ k3s_server_location }}"
force: true force: true
state: link state: link
when: k3s_server_location != "/var/lib/rancher/k3s" when:
- k3s_server_location is defined
- k3s_server_location != "/var/lib/rancher/k3s"

View File

@@ -34,7 +34,7 @@
ansible_facts.lsb.description|default("") is match("Debian") ) ansible_facts.lsb.description|default("") is match("Debian") )
- name: Set detected_distribution to ArchLinux (ARM64) - name: Set detected_distribution to ArchLinux (ARM64)
set_fact: ansible.builtin.set_fact:
detected_distribution: Archlinux detected_distribution: Archlinux
when: when:
- ansible_facts.architecture is search("aarch64") - ansible_facts.architecture is search("aarch64")

View File

@@ -1,14 +1,15 @@
--- ---
- name: Enable cgroup via boot commandline if not already enabled for Archlinux - name: Enable cgroup via boot commandline if not already enabled for Archlinux
lineinfile: ansible.builtin.lineinfile:
path: /boot/boot.txt path: /boot/boot.txt
search_string: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" search_string: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}"
line: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory line: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory
register: kernel_cmdline_cgroup register: kernel_cmdline_cgroup
- name: Create - name: Create
shell: ./mkscr ansible.builtin.command: ./mkscr
args: args:
chdir: /boot chdir: /boot
notify: reboot notify: reboot
when: kernel_cmdline_cgroup.changed changed_when: false
when: kernel_cmdline_cgroup.changed # noqa: no-handler