diff --git a/.ansible-lint b/.ansible-lint index 940e20e..8024244 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -13,5 +13,8 @@ exclude_paths: - 'molecule/**/prepare.yml' - 'molecule/**/reset.yml' + # The file was generated by galaxy ansible - don't mess with it. + - 'galaxy.yml' + skip_list: - 'fqcn-builtins' diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 986c9cb..ad49c18 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -37,6 +37,11 @@ systemd_dir: "" flannel_iface: "" +#calico_iface: "" +calico_ebpf: "" +calico_cidr: "" +calico_tag: "" + apiserver_endpoint: "" k3s_token: "NA" @@ -46,6 +51,9 @@ extra_agent_args: "" kube_vip_tag_version: "" +kube_vip_cloud_provider_tag_version: "" +kube_vip_lb_ip_range: "" + metal_lb_speaker_tag_version: "" metal_lb_controller_tag_version: "" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 53faada..29a02b6 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,18 @@ updates: ignore: - dependency-name: "*" update-types: ["version-update:semver-major"] + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + rebase-strategy: "auto" + + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "daily" + rebase-strategy: "auto" + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-major"] diff --git a/.github/workflows/cache.yml b/.github/workflows/cache.yml new file mode 100644 index 0000000..a6d005a --- /dev/null +++ b/.github/workflows/cache.yml @@ -0,0 +1,42 @@ +--- +name: "Cache" +on: + workflow_call: +jobs: + molecule: + name: cache + runs-on: self-hosted + env: + PYTHON_VERSION: "3.11" + + steps: + - name: Check out the codebase + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v3 4.1.1 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # 5.0.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' # caching pip dependencies + + - name: Cache Vagrant boxes + id: cache-vagrant + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # 4.0 + with: + lookup-only: true #if it exists, we don't need to restore and can skip the next step + path: | + ~/.vagrant.d/boxes + key: vagrant-boxes-${{ hashFiles('**/molecule.yml') }} + restore-keys: | + vagrant-boxes + + - name: Download Vagrant boxes for all scenarios + # To save some cache space, all scenarios share the same cache key. + # On the other hand, this means that the cache contents should be + # the same across all scenarios. This step ensures that. + if: steps.cache-vagrant.outputs.cache-hit != 'true' # only run if false since this is just a cache step + run: | + ./.github/download-boxes.sh + vagrant box list diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 54be8ff..1d823c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,14 +2,26 @@ name: "CI" on: pull_request: - push: - branches: - - master + types: + - opened + - synchronize paths-ignore: - - '**/README.md' + - '**/.gitignore' + - '**/FUNDING.yml' + - '**/host.ini' + - '**/*.md' + - '**/.editorconfig' + - '**/ansible.example.cfg' + - '**/deploy.sh' + - '**/LICENSE' + - '**/reboot.sh' + - '**/reset.sh' jobs: + pre: + uses: ./.github/workflows/cache.yml lint: uses: ./.github/workflows/lint.yml + needs: [pre] test: uses: ./.github/workflows/test.yml - needs: [lint] + needs: [pre, lint] diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b43f5bb..6a4020a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,37 +5,27 @@ on: jobs: pre-commit-ci: name: Pre-Commit - runs-on: ubuntu-latest + runs-on: self-hosted env: PYTHON_VERSION: "3.11" steps: - name: Check out the codebase - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v3 2.5.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v3 4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # 2.3.3 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # 5.0.0 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' # caching pip dependencies - - name: Cache pip - uses: actions/cache@9b0c1fce7a93df8e3bb8926b0d6e9d89e92f20a7 # 3.0.11 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('./requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Cache Ansible - uses: actions/cache@9b0c1fce7a93df8e3bb8926b0d6e9d89e92f20a7 # 3.0.11 + - name: Restore Ansible cache + uses: actions/cache/restore@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # 4.0 with: path: ~/.ansible/collections - key: ${{ runner.os }}-ansible-${{ hashFiles('collections/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-ansible- + key: ansible-${{ hashFiles('collections/requirements.yml') }} - name: Install dependencies run: | @@ -47,21 +37,17 @@ jobs: python3 -m pip install -r requirements.txt echo "::endgroup::" - echo "::group::Install Ansible role requirements from collections/requirements.yml" - ansible-galaxy install -r collections/requirements.yml - echo "::endgroup::" - - name: Run pre-commit uses: pre-commit/action@646c83fcd040023954eafda54b4db0192ce70507 # 3.0.0 ensure-pinned-actions: name: Ensure SHA Pinned Actions - runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout code - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v3 2.5.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v3 4.1.1 - name: Ensure SHA pinned actions - uses: zgosalvez/github-actions-ensure-sha-pinned-actions@af2eb3226618e2494e3d9084f515ad6dcf16e229 # 2.0.1 + uses: zgosalvez/github-actions-ensure-sha-pinned-actions@ba37328d4ea95eaf8b3bd6c6cef308f709a5f2ec # 3.0.3 with: allowlist: | aws-actions/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 10b6135..c93e9f4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,23 +5,50 @@ on: jobs: molecule: name: Molecule - runs-on: macos-12 + runs-on: self-hosted strategy: matrix: scenario: - default - ipv6 - single_node + - calico + - kube-vip fail-fast: false env: PYTHON_VERSION: "3.11" steps: - name: Check out the codebase - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v3 2.5.0 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v3 4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} + # these steps are necessary if not using ephemeral nodes + - name: Delete old Vagrant box versions + if: always() # do this even if a step before has failed + run: vagrant box prune --force + + - name: Remove all local Vagrant boxes + if: always() # do this even if a step before has failed + run: if vagrant box list 2>/dev/null; then vagrant box list | cut -f 1 -d ' ' | xargs -L 1 vagrant box remove -f 2>/dev/null && echo "All Vagrant boxes removed." || echo "No Vagrant boxes found."; else echo "No Vagrant boxes found."; fi + + - name: Remove all Virtualbox VMs + if: always() # do this even if a step before has failed + run: VBoxManage list vms | awk -F'"' '{print $2}' | xargs -I {} VBoxManage unregistervm --delete "{}" + + - name: Remove all Virtualbox HDs + if: always() # do this even if a step before has failed + run: VBoxManage list hdds | awk -F':' '/^UUID:/ {print $2}' | xargs -I {} VBoxManage closemedium disk "{}" --delete + + - name: Remove all Virtualbox Networks + if: always() # do this even if a step before has failed + run: VBoxManage list hostonlyifs | grep '^Name:' | awk '{print $2}' | grep '^vboxnet' | xargs -I {} VBoxManage hostonlyif remove {} + + - name: Remove Virtualbox network config + if: always() # do this even if a step before has failed + run: sudo rm /etc/vbox/networks.conf || true + - name: Configure VirtualBox run: |- sudo mkdir -p /etc/vbox @@ -30,35 +57,19 @@ jobs: * fdad:bad:ba55::/64 EOF - - name: Cache pip - uses: actions/cache@9b0c1fce7a93df8e3bb8926b0d6e9d89e92f20a7 # 3.0.11 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('./requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Cache Vagrant boxes - uses: actions/cache@9b0c1fce7a93df8e3bb8926b0d6e9d89e92f20a7 # 3.0.11 - with: - path: | - ~/.vagrant.d/boxes - key: vagrant-boxes-${{ hashFiles('**/molecule.yml') }} - restore-keys: | - vagrant-boxes - - - name: Download Vagrant boxes for all scenarios - # To save some cache space, all scenarios share the same cache key. - # On the other hand, this means that the cache contents should be - # the same across all scenarios. This step ensures that. - run: ./.github/download-boxes.sh - - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@75f3110429a8c05be0e1bf360334e4cced2b63fa # 2.3.3 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # 5.0.0 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' # caching pip dependencies + - name: Restore vagrant Boxes cache + uses: actions/cache/restore@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # 4.0 + with: + path: ~/.vagrant.d/boxes + key: vagrant-boxes-${{ hashFiles('**/molecule.yml') }} + fail-on-cache-miss: true + - name: Install dependencies run: | echo "::group::Upgrade pip" @@ -75,18 +86,40 @@ jobs: env: ANSIBLE_K3S_LOG_DIR: ${{ runner.temp }}/logs/k3s-ansible/${{ matrix.scenario }} ANSIBLE_SSH_RETRIES: 4 - ANSIBLE_TIMEOUT: 60 + ANSIBLE_TIMEOUT: 120 PY_COLORS: 1 ANSIBLE_FORCE_COLOR: 1 + # these steps are necessary if not using ephemeral nodes + - name: Delete old Vagrant box versions + if: always() # do this even if a step before has failed + run: vagrant box prune --force + + - name: Remove all local Vagrant boxes + if: always() # do this even if a step before has failed + run: if vagrant box list 2>/dev/null; then vagrant box list | cut -f 1 -d ' ' | xargs -L 1 vagrant box remove -f 2>/dev/null && echo "All Vagrant boxes removed." || echo "No Vagrant boxes found."; else echo "No Vagrant boxes found."; fi + + - name: Remove all Virtualbox VMs + if: always() # do this even if a step before has failed + run: VBoxManage list vms | awk -F'"' '{print $2}' | xargs -I {} VBoxManage unregistervm --delete "{}" + + - name: Remove all Virtualbox HDs + if: always() # do this even if a step before has failed + run: VBoxManage list hdds | awk -F':' '/^UUID:/ {print $2}' | xargs -I {} VBoxManage closemedium disk "{}" --delete + + - name: Remove all Virtualbox Networks + if: always() # do this even if a step before has failed + run: VBoxManage list hostonlyifs | grep '^Name:' | awk '{print $2}' | grep '^vboxnet' | xargs -I {} VBoxManage hostonlyif remove {} + + - name: Remove Virtualbox network config + if: always() # do this even if a step before has failed + run: sudo rm /etc/vbox/networks.conf || true + - name: Upload log files if: always() # do this even if a step before has failed - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # 3.1.1 + uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # 4.3.0 with: name: logs path: | ${{ runner.temp }}/logs - - - name: Delete old box versions - if: always() # do this even if a step before has failed - run: vagrant box prune --force + overwrite: true diff --git a/.gitignore b/.gitignore index 78f3d0b..89c5d4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .env/ *.log ansible.cfg +kubeconfig diff --git a/.yamllint b/.yamllint index 8f19687..a60b44e 100644 --- a/.yamllint +++ b/.yamllint @@ -6,4 +6,6 @@ rules: max: 120 level: warning truthy: - allowed-values: ['true', 'false', 'yes', 'no'] + allowed-values: ['true', 'false'] +ignore: + - galaxy.yml diff --git a/README.md b/README.md index cdb24fd..8775d79 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,28 @@ You can find more information about it [here](molecule/README.md). This repo uses `pre-commit` and `pre-commit-hooks` to lint and fix common style and syntax errors. Be sure to install python packages and then run `pre-commit install`. For more information, see [pre-commit](https://pre-commit.com/) +## 🌌 Ansible Galaxy + +This collection can now be used in larger ansible projects. + +Instructions: + +- create or modify a file `collections/requirements.yml` in your project + +```yml +collections: + - name: ansible.utils + - name: community.general + - name: ansible.posix + - name: kubernetes.core + - name: https://github.com/techno-tim/k3s-ansible.git + type: git + version: master +``` + +- install via `ansible-galaxy collection install -r ./collections/requirements.yml` +- every role is now available via the prefix `techno_tim.k3s_ansible.` e.g. `techno_tim.k3s_ansible.lxc` + ## Thanks 🤝 This repo is really standing on the shoulders of giants. Thank you to all those who have contributed and thanks to these repos for code and ideas: diff --git a/galaxy.yml b/galaxy.yml new file mode 100644 index 0000000..0f9b196 --- /dev/null +++ b/galaxy.yml @@ -0,0 +1,81 @@ +### REQUIRED +# The namespace of the collection. This can be a company/brand/organization or product namespace under which all +# content lives. May only contain alphanumeric lowercase characters and underscores. Namespaces cannot start with +# underscores or numbers and cannot contain consecutive underscores +namespace: techno_tim + +# The name of the collection. Has the same character restrictions as 'namespace' +name: k3s_ansible + +# The version of the collection. Must be compatible with semantic versioning +version: 1.0.0 + +# The path to the Markdown (.md) readme file. This path is relative to the root of the collection +readme: README.md + +# A list of the collection's content authors. Can be just the name or in the format 'Full Name (url) +# @nicks:irc/im.site#channel' +authors: +- your name + + +### OPTIONAL but strongly recommended +# A short summary description of the collection +description: > + The easiest way to bootstrap a self-hosted High Availability Kubernetes + cluster. A fully automated HA k3s etcd install with kube-vip, MetalLB, + and more. + +# Either a single license or a list of licenses for content inside of a collection. Ansible Galaxy currently only +# accepts L(SPDX,https://spdx.org/licenses/) licenses. This key is mutually exclusive with 'license_file' +license: +- Apache-2.0 + + +# A list of tags you want to associate with the collection for indexing/searching. A tag name has the same character +# requirements as 'namespace' and 'name' +tags: + - etcd + - high-availability + - k8s + - k3s + - k3s-cluster + - kube-vip + - kubernetes + - metallb + - rancher + +# Collections that this collection requires to be installed for it to be usable. The key of the dict is the +# collection label 'namespace.name'. The value is a version range +# L(specifiers,https://python-semanticversion.readthedocs.io/en/latest/#requirement-specification). Multiple version +# range specifiers can be set and are separated by ',' +dependencies: + ansible.utils: '*' + ansible.posix: '*' + community.general: '*' + kubernetes.core: '*' + +# The URL of the originating SCM repository +repository: https://github.com/techno-tim/k3s-ansible + +# The URL to any online docs +documentation: https://github.com/techno-tim/k3s-ansible + +# The URL to the homepage of the collection/project +homepage: https://www.youtube.com/watch?v=CbkEWcUZ7zM + +# The URL to the collection issue tracker +issues: https://github.com/techno-tim/k3s-ansible/issues + +# A list of file glob-like patterns used to filter any files or directories that should not be included in the build +# artifact. A pattern is matched from the relative path of the file or directory of the collection directory. This +# uses 'fnmatch' to match the files or directories. Some directories and files like 'galaxy.yml', '*.pyc', '*.retry', +# and '.git' are always filtered. Mutually exclusive with 'manifest' +build_ignore: [] + +# A dict controlling use of manifest directives used in building the collection artifact. The key 'directives' is a +# list of MANIFEST.in style +# L(directives,https://packaging.python.org/en/latest/guides/using-manifest-in/#manifest-in-commands). The key +# 'omit_default_directives' is a boolean that controls whether the default directives are used. Mutually exclusive +# with 'build_ignore' +# manifest: null diff --git a/inventory/sample/group_vars/all.yml b/inventory/sample/group_vars/all.yml index 5b923f7..f6f8120 100644 --- a/inventory/sample/group_vars/all.yml +++ b/inventory/sample/group_vars/all.yml @@ -1,5 +1,5 @@ --- -k3s_version: v1.25.16+k3s4 +k3s_version: v1.29.0+k3s1 # this is the user that has ssh access to these machines ansible_user: ansibleuser systemd_dir: /etc/systemd/system @@ -10,6 +10,12 @@ system_timezone: "Your/Timezone" # interface which will be used for flannel flannel_iface: "eth0" +# uncomment calico_iface to use tigera operator/calico cni instead of flannel https://docs.tigera.io/calico/latest/about +# calico_iface: "eth0" +calico_ebpf: false # use eBPF dataplane instead of iptables +calico_cidr: "10.52.0.0/16" # calico cluster pod cidr pool +calico_tag: "v3.27.0" # calico version tag + # apiserver_endpoint is virtual ip-address which will be configured on each master apiserver_endpoint: "192.168.30.222" @@ -20,28 +26,42 @@ k3s_token: "some-SUPER-DEDEUPER-secret-password" # The IP on which the node is reachable in the cluster. # Here, a sensible default is provided, you can still override # it for each of your hosts, though. -k3s_node_ip: '{{ ansible_facts[flannel_iface]["ipv4"]["address"] }}' +k3s_node_ip: "{{ ansible_facts[(calico_iface | default(flannel_iface))]['ipv4']['address'] }}" # Disable the taint manually by setting: k3s_master_taint = false k3s_master_taint: "{{ true if groups['node'] | default([]) | length >= 1 else false }}" # these arguments are recommended for servers as well as agents: extra_args: >- - --flannel-iface={{ flannel_iface }} + {{ '--flannel-iface=' + flannel_iface if calico_iface is not defined else '' }} --node-ip={{ k3s_node_ip }} # change these to your liking, the only required are: --disable servicelb, --tls-san {{ apiserver_endpoint }} +# the contents of the if block is also required if using calico extra_server_args: >- {{ extra_args }} {{ '--node-taint node-role.kubernetes.io/master=true:NoSchedule' if k3s_master_taint else '' }} + {% if calico_iface is defined %} + --flannel-backend=none + --disable-network-policy + --cluster-cidr={{ calico_cidr | default('10.52.0.0/16') }} + {% endif %} --tls-san {{ apiserver_endpoint }} --disable servicelb --disable traefik + extra_agent_args: >- {{ extra_args }} # image tag for kube-vip -kube_vip_tag_version: "v0.5.12" +kube_vip_tag_version: "v0.6.4" + +# tag for kube-vip-cloud-provider manifest +# kube_vip_cloud_provider_tag_version: "main" + +# kube-vip ip range for load balancer +# (uncomment to use kube-vip for services instead of MetalLB) +# kube_vip_lb_ip_range: "192.168.30.80-192.168.30.90" # metallb type frr or native metal_lb_type: "native" @@ -55,8 +75,8 @@ metal_lb_mode: "layer2" # metal_lb_bgp_peer_address: "192.168.30.1" # image tag for metal lb -metal_lb_speaker_tag_version: "v0.13.9" -metal_lb_controller_tag_version: "v0.13.9" +metal_lb_speaker_tag_version: "v0.13.12" +metal_lb_controller_tag_version: "v0.13.12" # metallb ip range for load balancer metal_lb_ip_range: "192.168.30.80-192.168.30.90" @@ -66,9 +86,9 @@ metal_lb_ip_range: "192.168.30.80-192.168.30.90" # Please read https://gist.github.com/triangletodd/02f595cd4c0dc9aac5f7763ca2264185 before using this. # Most notably, your containers must be privileged, and must not have nesting set to true. # Please note this script disables most of the security of lxc containers, with the trade off being that lxc -# containers are significantly more resource efficent compared to full VMs. +# containers are significantly more resource efficient compared to full VMs. # Mixing and matching VMs and lxc containers is not supported, ymmv if you want to do this. -# I would only really recommend using this if you have partiularly low powered proxmox nodes where the overhead of +# I would only really recommend using this if you have particularly low powered proxmox nodes where the overhead of # VMs would use a significant portion of your available resources. proxmox_lxc_configure: false # the user that you would use to ssh into the host, for example if you run ssh some-user@my-proxmox-host, diff --git a/molecule/README.md b/molecule/README.md index 25cfdad..f8d6b70 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -13,6 +13,10 @@ We have these scenarios: To save a bit of test time, this cluster is _not_ highly available, it consists of only one control and one worker node. - **single_node**: Very similar to the default scenario, but uses only a single node for all cluster functionality. +- **calico**: + The same as single node, but uses calico cni instead of flannel. +- **kube-vip** + The same as single node, but uses kube-vip as service loadbalancer instead of MetalLB ## How to execute diff --git a/molecule/calico/molecule.yml b/molecule/calico/molecule.yml new file mode 100644 index 0000000..f9487f3 --- /dev/null +++ b/molecule/calico/molecule.yml @@ -0,0 +1,49 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant +platforms: + - name: control1 + box: generic/ubuntu2204 + memory: 4096 + cpus: 4 + config_options: + # We currently can not use public-key based authentication on Ubuntu 22.04, + # see: https://github.com/chef/bento/issues/1405 + ssh.username: "vagrant" + ssh.password: "vagrant" + groups: + - k3s_cluster + - master + interfaces: + - network_name: private_network + ip: 192.168.30.62 +provisioner: + name: ansible + env: + ANSIBLE_VERBOSITY: 1 + playbooks: + converge: ../resources/converge.yml + side_effect: ../resources/reset.yml + verify: ../resources/verify.yml + inventory: + links: + group_vars: ../../inventory/sample/group_vars +scenario: + test_sequence: + - dependency + - cleanup + - destroy + - syntax + - create + - prepare + - converge + # idempotence is not possible with the playbook in its current form. + - verify + # We are repurposing side_effect here to test the reset playbook. + # This is why we do not run it before verify (which tests the cluster), + # but after the verify step. + - side_effect + - cleanup + - destroy diff --git a/molecule/calico/overrides.yml b/molecule/calico/overrides.yml new file mode 100644 index 0000000..8a78c51 --- /dev/null +++ b/molecule/calico/overrides.yml @@ -0,0 +1,16 @@ +--- +- name: Apply overrides + hosts: all + tasks: + - name: Override host variables + ansible.builtin.set_fact: + # See: + # https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant + calico_iface: eth1 + + # The test VMs might be a bit slow, so we give them more time to join the cluster: + retry_count: 45 + + # Make sure that our IP ranges do not collide with those of the other scenarios + apiserver_endpoint: "192.168.30.224" + metal_lb_ip_range: "192.168.30.100-192.168.30.109" diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index 4a07503..733edd1 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -7,7 +7,7 @@ platforms: - name: control1 box: generic/ubuntu2204 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -22,8 +22,8 @@ platforms: ssh.password: "vagrant" - name: control2 - box: generic/debian11 - memory: 2048 + box: generic/debian12 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -34,7 +34,7 @@ platforms: - name: control3 box: generic/rocky9 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -45,7 +45,7 @@ platforms: - name: node1 box: generic/ubuntu2204 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -61,7 +61,7 @@ platforms: - name: node2 box: generic/rocky9 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -72,6 +72,8 @@ platforms: provisioner: name: ansible + env: + ANSIBLE_VERBOSITY: 1 playbooks: converge: ../resources/converge.yml side_effect: ../resources/reset.yml @@ -82,7 +84,6 @@ provisioner: scenario: test_sequence: - dependency - - lint - cleanup - destroy - syntax diff --git a/molecule/default/prepare.yml b/molecule/default/prepare.yml index 17da4dd..044aa79 100644 --- a/molecule/default/prepare.yml +++ b/molecule/default/prepare.yml @@ -17,6 +17,6 @@ # and security needs. ansible.builtin.systemd: name: firewalld - enabled: no + enabled: false state: stopped become: true diff --git a/molecule/ipv6/molecule.yml b/molecule/ipv6/molecule.yml index 2ad6423..28f425b 100644 --- a/molecule/ipv6/molecule.yml +++ b/molecule/ipv6/molecule.yml @@ -6,7 +6,7 @@ driver: platforms: - name: control1 box: generic/ubuntu2204 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -22,7 +22,7 @@ platforms: - name: control2 box: generic/ubuntu2204 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -38,7 +38,7 @@ platforms: - name: node1 box: generic/ubuntu2204 - memory: 2048 + memory: 1024 cpus: 2 groups: - k3s_cluster @@ -53,6 +53,8 @@ platforms: ssh.password: "vagrant" provisioner: name: ansible + env: + ANSIBLE_VERBOSITY: 1 playbooks: converge: ../resources/converge.yml side_effect: ../resources/reset.yml @@ -63,7 +65,6 @@ provisioner: scenario: test_sequence: - dependency - - lint - cleanup - destroy - syntax diff --git a/molecule/kube-vip/molecule.yml b/molecule/kube-vip/molecule.yml new file mode 100644 index 0000000..f9487f3 --- /dev/null +++ b/molecule/kube-vip/molecule.yml @@ -0,0 +1,49 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant +platforms: + - name: control1 + box: generic/ubuntu2204 + memory: 4096 + cpus: 4 + config_options: + # We currently can not use public-key based authentication on Ubuntu 22.04, + # see: https://github.com/chef/bento/issues/1405 + ssh.username: "vagrant" + ssh.password: "vagrant" + groups: + - k3s_cluster + - master + interfaces: + - network_name: private_network + ip: 192.168.30.62 +provisioner: + name: ansible + env: + ANSIBLE_VERBOSITY: 1 + playbooks: + converge: ../resources/converge.yml + side_effect: ../resources/reset.yml + verify: ../resources/verify.yml + inventory: + links: + group_vars: ../../inventory/sample/group_vars +scenario: + test_sequence: + - dependency + - cleanup + - destroy + - syntax + - create + - prepare + - converge + # idempotence is not possible with the playbook in its current form. + - verify + # We are repurposing side_effect here to test the reset playbook. + # This is why we do not run it before verify (which tests the cluster), + # but after the verify step. + - side_effect + - cleanup + - destroy diff --git a/molecule/kube-vip/overrides.yml b/molecule/kube-vip/overrides.yml new file mode 100644 index 0000000..c64b1f3 --- /dev/null +++ b/molecule/kube-vip/overrides.yml @@ -0,0 +1,17 @@ +--- +- name: Apply overrides + hosts: all + tasks: + - name: Override host variables + ansible.builtin.set_fact: + # See: + # https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant + flannel_iface: eth1 + + # The test VMs might be a bit slow, so we give them more time to join the cluster: + retry_count: 45 + + # Make sure that our IP ranges do not collide with those of the other scenarios + apiserver_endpoint: "192.168.30.225" + # Use kube-vip instead of MetalLB + kube_vip_lb_ip_range: "192.168.30.110-192.168.30.119" diff --git a/molecule/resources/verify_from_outside/tasks/test/deploy-example.yml b/molecule/resources/verify_from_outside/tasks/test/deploy-example.yml index 61c4cec..385c7cc 100644 --- a/molecule/resources/verify_from_outside/tasks/test/deploy-example.yml +++ b/molecule/resources/verify_from_outside/tasks/test/deploy-example.yml @@ -35,7 +35,7 @@ - name: Assert that the nginx welcome page is available ansible.builtin.uri: url: http://{{ ip | ansible.utils.ipwrap }}:{{ port_ }}/ - return_content: yes + return_content: true register: result failed_when: "'Welcome to nginx!' not in result.content" vars: diff --git a/molecule/single_node/molecule.yml b/molecule/single_node/molecule.yml index 1a7ed84..276b6d3 100644 --- a/molecule/single_node/molecule.yml +++ b/molecule/single_node/molecule.yml @@ -21,6 +21,8 @@ platforms: ip: 192.168.30.50 provisioner: name: ansible + env: + ANSIBLE_VERBOSITY: 1 playbooks: converge: ../resources/converge.yml side_effect: ../resources/reset.yml @@ -31,7 +33,6 @@ provisioner: scenario: test_sequence: - dependency - - lint - cleanup - destroy - syntax diff --git a/reboot.yml b/reboot.yml index a970665..ffba507 100644 --- a/reboot.yml +++ b/reboot.yml @@ -1,7 +1,7 @@ --- - name: Reboot k3s_cluster hosts: k3s_cluster - gather_facts: yes + gather_facts: true tasks: - name: Reboot the nodes (and Wait upto 5 mins max) become: true diff --git a/requirements.in b/requirements.in index 715153b..e0eac29 100644 --- a/requirements.in +++ b/requirements.in @@ -1,10 +1,10 @@ -ansible-core>=2.13.5 +ansible-core>=2.16.2 jmespath>=1.0.1 -jsonpatch>=1.32 -kubernetes>=25.3.0 -molecule-vagrant>=1.0.0 -molecule>=4.0.3 -netaddr>=0.8.0 -pre-commit>=2.20.0 -pre-commit-hooks>=1.3.1 -pyyaml>=6.0 +jsonpatch>=1.33 +kubernetes>=29.0.0 +molecule-plugins[vagrant] +molecule>=6.0.3 +netaddr>=0.10.1 +pre-commit>=3.6.0 +pre-commit-hooks>=4.5.0 +pyyaml>=6.0.1 diff --git a/requirements.txt b/requirements.txt index 9dcf161..48773d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,174 +4,165 @@ # # pip-compile requirements.in # -ansible-compat==3.0.1 +ansible-compat==4.1.11 # via molecule -ansible-core==2.15.4 +ansible-core==2.16.2 # via # -r requirements.in # ansible-compat -arrow==1.2.3 - # via jinja2-time -attrs==22.1.0 - # via jsonschema -binaryornot==0.4.4 - # via cookiecutter -cachetools==5.2.0 + # molecule +attrs==23.2.0 + # via + # jsonschema + # referencing +bracex==2.4 + # via wcmatch +cachetools==5.3.2 # via google-auth -certifi==2022.9.24 +certifi==2023.11.17 # via # kubernetes # requests -cffi==1.15.1 +cffi==1.16.0 # via cryptography -cfgv==3.3.1 +cfgv==3.4.0 # via pre-commit -chardet==5.0.0 - # via binaryornot -charset-normalizer==2.1.1 +charset-normalizer==3.3.2 # via requests -click==8.1.3 +click==8.1.7 # via # click-help-colors - # cookiecutter # molecule -click-help-colors==0.9.1 +click-help-colors==0.9.4 # via molecule -commonmark==0.9.1 - # via rich -cookiecutter==2.1.1 - # via molecule -cryptography==38.0.3 +cryptography==41.0.7 # via ansible-core -distlib==0.3.6 +distlib==0.3.8 # via virtualenv -distro==1.8.0 - # via selinux enrich==1.2.7 # via molecule -filelock==3.8.0 +filelock==3.13.1 # via virtualenv -google-auth==2.14.0 +google-auth==2.26.2 # via kubernetes -identify==2.5.8 +identify==2.5.33 # via pre-commit -idna==3.4 +idna==3.6 # via requests -jinja2==3.1.2 +jinja2==3.1.3 # via # ansible-core - # cookiecutter - # jinja2-time # molecule - # molecule-vagrant -jinja2-time==0.2.0 - # via cookiecutter jmespath==1.0.1 # via -r requirements.in jsonpatch==1.33 # via -r requirements.in -jsonpointer==2.3 +jsonpointer==2.4 # via jsonpatch -jsonschema==4.17.0 +jsonschema==4.21.1 # via # ansible-compat # molecule -kubernetes==25.3.0 +jsonschema-specifications==2023.12.1 + # via jsonschema +kubernetes==29.0.0 # via -r requirements.in -markupsafe==2.1.1 +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.4 # via jinja2 -molecule==4.0.4 +mdurl==0.1.2 + # via markdown-it-py +molecule==6.0.3 # via # -r requirements.in - # molecule-vagrant -molecule-vagrant==1.0.0 + # molecule-plugins +molecule-plugins[vagrant]==23.5.0 # via -r requirements.in -netaddr==0.10.0 +netaddr==0.10.1 # via -r requirements.in -nodeenv==1.7.0 +nodeenv==1.8.0 # via pre-commit oauthlib==3.2.2 - # via requests-oauthlib -packaging==21.3 + # via + # kubernetes + # requests-oauthlib +packaging==23.2 # via # ansible-compat # ansible-core # molecule -platformdirs==2.5.2 +platformdirs==4.1.0 # via virtualenv -pluggy==1.0.0 +pluggy==1.3.0 # via molecule -pre-commit==2.21.0 +pre-commit==3.6.0 # via -r requirements.in pre-commit-hooks==4.5.0 # via -r requirements.in -pyasn1==0.4.8 +pyasn1==0.5.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 +pyasn1-modules==0.3.0 # via google-auth pycparser==2.21 # via cffi -pygments==2.13.0 +pygments==2.17.2 # via rich -pyparsing==3.0.9 - # via packaging -pyrsistent==0.19.2 - # via jsonschema python-dateutil==2.8.2 - # via - # arrow - # kubernetes -python-slugify==6.1.2 - # via cookiecutter + # via kubernetes python-vagrant==1.0.0 - # via molecule-vagrant + # via molecule-plugins pyyaml==6.0.1 # via # -r requirements.in # ansible-compat # ansible-core - # cookiecutter # kubernetes # molecule - # molecule-vagrant # pre-commit -requests==2.28.1 +referencing==0.32.1 + # via + # jsonschema + # jsonschema-specifications +requests==2.31.0 # via - # cookiecutter # kubernetes # requests-oauthlib requests-oauthlib==1.3.1 # via kubernetes -resolvelib==0.8.1 +resolvelib==1.0.1 # via ansible-core -rich==12.6.0 +rich==13.7.0 # via # enrich # molecule +rpds-py==0.17.1 + # via + # jsonschema + # referencing rsa==4.9 # via google-auth -ruamel-yaml==0.17.21 +ruamel-yaml==0.18.5 # via pre-commit-hooks -selinux==0.2.1 - # via molecule-vagrant +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml six==1.16.0 # via - # google-auth # kubernetes # python-dateutil subprocess-tee==0.4.1 # via ansible-compat -text-unidecode==1.3 - # via python-slugify -urllib3==1.26.12 +urllib3==2.1.0 # via # kubernetes # requests -virtualenv==20.16.6 +virtualenv==20.25.0 # via pre-commit -websocket-client==1.4.2 +wcmatch==8.5 + # via molecule +websocket-client==1.7.0 # via kubernetes # The following packages are considered to be unsafe in a requirements file: diff --git a/reset.yml b/reset.yml index 02d4d89..d09c947 100644 --- a/reset.yml +++ b/reset.yml @@ -1,7 +1,7 @@ --- - name: Reset k3s cluster hosts: k3s_cluster - gather_facts: yes + gather_facts: true roles: - role: reset become: true @@ -17,7 +17,7 @@ - name: Revert changes to Proxmox cluster hosts: proxmox gather_facts: true - become: yes + become: true remote_user: "{{ proxmox_lxc_ssh_user }}" roles: - role: reset_proxmox_lxc diff --git a/roles/k3s_agent/tasks/http_proxy.yml b/roles/k3s_agent/tasks/http_proxy.yml index f0a68f6..d4943e2 100644 --- a/roles/k3s_agent/tasks/http_proxy.yml +++ b/roles/k3s_agent/tasks/http_proxy.yml @@ -1,8 +1,8 @@ --- -- name: Create k3s.service.d directory +- name: Create k3s-node.service.d directory file: - path: '{{ systemd_dir }}/k3s.service.d' + path: '{{ systemd_dir }}/k3s-node.service.d' state: directory owner: root group: root @@ -12,7 +12,7 @@ - name: Copy K3s http_proxy conf file template: src: "http_proxy.conf.j2" - dest: "{{ systemd_dir }}/k3s.service.d/http_proxy.conf" + dest: "{{ systemd_dir }}/k3s-node.service.d/http_proxy.conf" owner: root group: root mode: '0755' diff --git a/roles/k3s_agent/tasks/main.yml b/roles/k3s_agent/tasks/main.yml index 395c1ac..3146697 100644 --- a/roles/k3s_agent/tasks/main.yml +++ b/roles/k3s_agent/tasks/main.yml @@ -15,6 +15,6 @@ - name: Enable and check K3s service systemd: name: k3s-node - daemon_reload: yes + daemon_reload: true state: restarted - enabled: yes + enabled: true diff --git a/roles/k3s_server/tasks/kube-vip.yml b/roles/k3s_server/tasks/kube-vip.yml new file mode 100644 index 0000000..d0c74af --- /dev/null +++ b/roles/k3s_server/tasks/kube-vip.yml @@ -0,0 +1,27 @@ +--- +- name: Create manifests directory on first master + file: + path: /var/lib/rancher/k3s/server/manifests + state: directory + owner: root + group: root + mode: 0644 + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] + +- name: Download vip cloud provider manifest to first master + ansible.builtin.get_url: + url: "https://raw.githubusercontent.com/kube-vip/kube-vip-cloud-provider/{{ kube_vip_cloud_provider_tag_version | default('main') }}/manifest/kube-vip-cloud-controller.yaml" # noqa yaml[line-length] + dest: "/var/lib/rancher/k3s/server/manifests/kube-vip-cloud-controller.yaml" + owner: root + group: root + mode: 0644 + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] + +- name: Copy kubevip configMap manifest to first master + template: + src: "kubevip.yaml.j2" + dest: "/var/lib/rancher/k3s/server/manifests/kubevip.yaml" + owner: root + group: root + mode: 0644 + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] diff --git a/roles/k3s_server/tasks/main.yml b/roles/k3s_server/tasks/main.yml index 030dc22..cc5f823 100644 --- a/roles/k3s_server/tasks/main.yml +++ b/roles/k3s_server/tasks/main.yml @@ -6,6 +6,13 @@ state: stopped failed_when: false +# k3s-init won't work if the port is already in use +- name: Stop k3s + systemd: + name: k3s + state: stopped + failed_when: false + - name: Clean previous runs of k3s-init # noqa command-instead-of-module # The systemd module does not support "reset-failed", so we need to resort to command. command: systemctl reset-failed k3s-init @@ -22,6 +29,12 @@ - name: Deploy metallb manifest include_tasks: metallb.yml tags: metallb + when: kube_vip_lb_ip_range is not defined + +- name: Deploy kube-vip manifest + include_tasks: kube-vip.yml + tags: kubevip + when: kube_vip_lb_ip_range is defined - name: Init cluster inside the transient k3s-init service command: @@ -29,7 +42,7 @@ -p Restart=on-failure \ --unit=k3s-init \ k3s server {{ server_init_args }}" - creates: "{{ systemd_dir }}/k3s.service" + creates: "{{ systemd_dir }}/k3s-init.service" - name: Verification when: not ansible_check_mode @@ -67,9 +80,9 @@ - name: Enable and check K3s service systemd: name: k3s - daemon_reload: yes + daemon_reload: true state: restarted - enabled: yes + enabled: true - name: Wait for node-token wait_for: @@ -110,7 +123,7 @@ copy: src: /etc/rancher/k3s/k3s.yaml dest: "{{ ansible_user_dir }}/.kube/config" - remote_src: yes + remote_src: true owner: "{{ ansible_user_id }}" mode: "u=rw,g=,o=" diff --git a/roles/k3s_server/templates/kubevip.yaml.j2 b/roles/k3s_server/templates/kubevip.yaml.j2 new file mode 100644 index 0000000..40d8b50 --- /dev/null +++ b/roles/k3s_server/templates/kubevip.yaml.j2 @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubevip + namespace: kube-system +data: +{% if kube_vip_lb_ip_range is string %} +{# kube_vip_lb_ip_range was used in the legacy way: single string instead of a list #} +{# => transform to list with single element #} +{% set kube_vip_lb_ip_range = [kube_vip_lb_ip_range] %} +{% endif %} + range-global: {{ kube_vip_lb_ip_range | join(',') }} diff --git a/roles/k3s_server/templates/vip.yaml.j2 b/roles/k3s_server/templates/vip.yaml.j2 index 862aee6..1092557 100644 --- a/roles/k3s_server/templates/vip.yaml.j2 +++ b/roles/k3s_server/templates/vip.yaml.j2 @@ -43,7 +43,7 @@ spec: - name: vip_ddns value: "false" - name: svc_enable - value: "false" + value: "{{ 'true' if kube_vip_lb_ip_range is defined else 'false' }}" - name: vip_leaderelection value: "true" - name: vip_leaseduration diff --git a/roles/k3s_server_post/defaults/main.yml b/roles/k3s_server_post/defaults/main.yml index 1c458fa..bbf9629 100644 --- a/roles/k3s_server_post/defaults/main.yml +++ b/roles/k3s_server_post/defaults/main.yml @@ -1,6 +1,6 @@ --- # Timeout to wait for MetalLB services to come up -metal_lb_available_timeout: 120s +metal_lb_available_timeout: 240s # Name of the master group group_name_master: master diff --git a/roles/k3s_server_post/tasks/calico.yml b/roles/k3s_server_post/tasks/calico.yml new file mode 100644 index 0000000..bf8b1d5 --- /dev/null +++ b/roles/k3s_server_post/tasks/calico.yml @@ -0,0 +1,114 @@ +--- +- name: Deploy Calico to cluster + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] + run_once: true + block: + - name: Create manifests directory on first master + file: + path: /tmp/k3s + state: directory + owner: root + group: root + mode: 0755 + + - name: "Download to first master: manifest for Tigera Operator and Calico CRDs" + ansible.builtin.get_url: + url: "https://raw.githubusercontent.com/projectcalico/calico/{{ calico_tag }}/manifests/tigera-operator.yaml" + dest: "/tmp/k3s/tigera-operator.yaml" + owner: root + group: root + mode: 0755 + + - name: Copy Calico custom resources manifest to first master + ansible.builtin.template: + src: "calico.crs.j2" + dest: /tmp/k3s/custom-resources.yaml + owner: root + group: root + mode: 0755 + + - name: Deploy or replace Tigera Operator + block: + - name: Deploy Tigera Operator + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/tigera-operator.yaml + register: create_operator + changed_when: "'created' in create_operator.stdout" + failed_when: "'Error' in create_operator.stderr and 'already exists' not in create_operator.stderr" + rescue: + - name: Replace existing Tigera Operator + ansible.builtin.command: + cmd: kubectl replace -f /tmp/k3s/tigera-operator.yaml + register: replace_operator + changed_when: "'replaced' in replace_operator.stdout" + failed_when: "'Error' in replace_operator.stderr" + + - name: Wait for Tigera Operator resources + command: >- + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='tigera-operator' + --for=condition=Available=True + --timeout=7s + register: tigera_result + changed_when: false + until: tigera_result is succeeded + retries: 7 + delay: 7 + with_items: + - {name: tigera-operator, type: deployment} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Deploy Calico custom resources + block: + - name: Deploy custom resources for Calico + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/custom-resources.yaml + register: create_cr + changed_when: "'created' in create_cr.stdout" + failed_when: "'Error' in create_cr.stderr and 'already exists' not in create_cr.stderr" + rescue: + - name: Apply new Calico custom resource manifest + ansible.builtin.command: + cmd: kubectl apply -f /tmp/k3s/custom-resources.yaml + register: apply_cr + changed_when: "'configured' in apply_cr.stdout or 'created' in apply_cr.stdout" + failed_when: "'Error' in apply_cr.stderr" + + - name: Wait for Calico system resources to be available + command: >- + {% if item.type == 'daemonset' %} + k3s kubectl wait pods + --namespace='{{ item.namespace }}' + --selector={{ item.selector }} + --for=condition=Ready + {% else %} + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='{{ item.namespace }}' + --for=condition=Available + {% endif %} + --timeout=7s + register: cr_result + changed_when: false + until: cr_result is succeeded + retries: 30 + delay: 7 + with_items: + - {name: calico-typha, type: deployment, namespace: calico-system} + - {name: calico-kube-controllers, type: deployment, namespace: calico-system} + - {name: csi-node-driver, type: daemonset, selector: 'k8s-app=csi-node-driver', namespace: calico-system} + - {name: calico-node, type: daemonset, selector: 'k8s-app=calico-node', namespace: calico-system} + - {name: calico-apiserver, type: deployment, namespace: calico-apiserver} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Patch Felix configuration for eBPF mode + ansible.builtin.command: + cmd: > + kubectl patch felixconfiguration default + --type='merge' + --patch='{"spec": {"bpfKubeProxyIptablesCleanupEnabled": false}}' + register: patch_result + changed_when: "'felixconfiguration.projectcalico.org/default patched' in patch_result.stdout" + failed_when: "'Error' in patch_result.stderr" + when: calico_ebpf diff --git a/roles/k3s_server_post/tasks/main.yml b/roles/k3s_server_post/tasks/main.yml index f88dc08..e9b9842 100644 --- a/roles/k3s_server_post/tasks/main.yml +++ b/roles/k3s_server_post/tasks/main.yml @@ -1,7 +1,13 @@ --- +- name: Deploy calico + include_tasks: calico.yml + tags: calico + when: calico_iface is defined + - name: Deploy metallb pool include_tasks: metallb.yml tags: metallb + when: kube_vip_lb_ip_range is not defined - name: Remove tmp directory used for manifests file: diff --git a/roles/k3s_server_post/tasks/metallb.yml b/roles/k3s_server_post/tasks/metallb.yml index 2421947..07a23b0 100644 --- a/roles/k3s_server_post/tasks/metallb.yml +++ b/roles/k3s_server_post/tasks/metallb.yml @@ -8,6 +8,27 @@ with_items: "{{ groups[group_name_master | default('master')] }}" run_once: true +- name: Delete outdated metallb replicas + shell: |- + set -o pipefail + + REPLICAS=$(k3s kubectl --namespace='metallb-system' get replicasets \ + -l 'component=controller,app=metallb' \ + -o jsonpath='{.items[0].spec.template.spec.containers[0].image}, {.items[0].metadata.name}' 2>/dev/null || true) + REPLICAS_SETS=$(echo ${REPLICAS} | grep -v '{{ metal_lb_controller_tag_version }}' | sed -e "s/^.*\s//g") + if [ -n "${REPLICAS_SETS}" ] ; then + for REPLICAS in "${REPLICAS_SETS}" + do + k3s kubectl --namespace='metallb-system' \ + delete rs "${REPLICAS}" + done + fi + args: + executable: /bin/bash + changed_when: false + run_once: true + with_items: "{{ groups[group_name_master | default('master')] }}" + - name: Copy metallb CRs manifest to first master template: src: "metallb.crs.j2" diff --git a/roles/k3s_server_post/templates/calico.crs.j2 b/roles/k3s_server_post/templates/calico.crs.j2 new file mode 100644 index 0000000..d33099d --- /dev/null +++ b/roles/k3s_server_post/templates/calico.crs.j2 @@ -0,0 +1,41 @@ +# This section includes base Calico installation configuration. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.Installation +apiVersion: operator.tigera.io/v1 +kind: Installation +metadata: + name: default +spec: + # Configures Calico networking. + calicoNetwork: + # Note: The ipPools section cannot be modified post-install. + ipPools: + - blockSize: {{ calico_blockSize | default('26') }} + cidr: {{ calico_cidr | default('10.52.0.0/16') }} + encapsulation: {{ calico_encapsulation | default('VXLANCrossSubnet') }} + natOutgoing: {{ calico_natOutgoing | default('Enabled') }} + nodeSelector: {{ calico_nodeSelector | default('all()') }} + nodeAddressAutodetectionV4: + interface: {{ calico_iface }} + linuxDataplane: {{ 'BPF' if calico_ebpf else 'Iptables' }} + +--- + +# This section configures the Calico API server. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.APIServer +apiVersion: operator.tigera.io/v1 +kind: APIServer +metadata: + name: default +spec: {} + +{% if calico_ebpf %} +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: kubernetes-services-endpoint + namespace: tigera-operator +data: + KUBERNETES_SERVICE_HOST: '{{ apiserver_endpoint }}' + KUBERNETES_SERVICE_PORT: '6443' +{% endif %} diff --git a/roles/prereq/tasks/main.yml b/roles/prereq/tasks/main.yml index 2fffe06..4dfca57 100644 --- a/roles/prereq/tasks/main.yml +++ b/roles/prereq/tasks/main.yml @@ -14,7 +14,7 @@ name: net.ipv4.ip_forward value: "1" state: present - reload: yes + reload: true tags: sysctl - name: Enable IPv6 forwarding @@ -22,7 +22,7 @@ name: net.ipv6.conf.all.forwarding value: "1" state: present - reload: yes + reload: true tags: sysctl - name: Enable IPv6 router advertisements @@ -30,7 +30,7 @@ name: net.ipv6.conf.all.accept_ra value: "2" state: present - reload: yes + reload: true tags: sysctl - name: Add br_netfilter to /etc/modules-load.d/ @@ -51,7 +51,7 @@ name: "{{ item }}" value: "1" state: present - reload: yes + reload: true when: ansible_os_family == "RedHat" loop: - net.bridge.bridge-nf-call-iptables diff --git a/roles/raspberrypi/tasks/main.yml b/roles/raspberrypi/tasks/main.yml index 29f824a..9b4c7d4 100644 --- a/roles/raspberrypi/tasks/main.yml +++ b/roles/raspberrypi/tasks/main.yml @@ -17,21 +17,27 @@ when: grep_cpuinfo_raspberrypi.rc == 0 or grep_device_tree_model_raspberrypi.rc == 0 -- name: Set detected_distribution to Raspbian +- name: Set detected_distribution to Raspbian (ARM64 on Raspbian, Debian Buster/Bullseye/Bookworm) set_fact: detected_distribution: Raspbian - when: > - raspberry_pi|default(false) and - ( ansible_facts.lsb.id|default("") == "Raspbian" or - ansible_facts.lsb.description|default("") is match("[Rr]aspbian.*") ) + vars: + allowed_descriptions: + - "[Rr]aspbian.*" + - "Debian.*buster" + - "Debian.*bullseye" + - "Debian.*bookworm" + when: + - ansible_facts.architecture is search("aarch64") + - raspberry_pi|default(false) + - ansible_facts.lsb.description|default("") is match(allowed_descriptions | join('|')) -- name: Set detected_distribution to Raspbian (ARM64 on Debian Buster) +- name: Set detected_distribution to Raspbian (ARM64 on Debian Bookworm) set_fact: detected_distribution: Raspbian when: - ansible_facts.architecture is search("aarch64") - raspberry_pi|default(false) - - ansible_facts.lsb.description|default("") is match("Debian.*buster") + - ansible_facts.lsb.description|default("") is match("Debian.*bookworm") - name: Set detected_distribution_major_version set_fact: @@ -39,14 +45,6 @@ when: - detected_distribution | default("") == "Raspbian" -- name: Set detected_distribution to Raspbian (ARM64 on Debian Bullseye) - set_fact: - detected_distribution: Raspbian - when: - - ansible_facts.architecture is search("aarch64") - - raspberry_pi|default(false) - - ansible_facts.lsb.description|default("") is match("Debian.*bullseye") - - name: Execute OS related tasks on the Raspberry Pi - {{ action_ }} include_tasks: "{{ item }}" with_first_found: diff --git a/roles/raspberrypi/tasks/setup/Rocky.yml b/roles/raspberrypi/tasks/setup/Rocky.yml index b037b1d..7fd8a38 100644 --- a/roles/raspberrypi/tasks/setup/Rocky.yml +++ b/roles/raspberrypi/tasks/setup/Rocky.yml @@ -2,7 +2,7 @@ - name: Enable cgroup via boot commandline if not already enabled for Rocky lineinfile: path: /boot/cmdline.txt - backrefs: yes + backrefs: true regexp: '^((?!.*\bcgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory\b).*)$' line: '\1 cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory' notify: reboot diff --git a/roles/raspberrypi/tasks/setup/Ubuntu.yml b/roles/raspberrypi/tasks/setup/Ubuntu.yml index 6b1e731..72c77c9 100644 --- a/roles/raspberrypi/tasks/setup/Ubuntu.yml +++ b/roles/raspberrypi/tasks/setup/Ubuntu.yml @@ -2,7 +2,7 @@ - name: Enable cgroup via boot commandline if not already enabled for Ubuntu on a Raspberry Pi lineinfile: path: /boot/firmware/cmdline.txt - backrefs: yes + backrefs: true regexp: '^((?!.*\bcgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory\b).*)$' line: '\1 cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory' notify: reboot diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index d75c9ff..3e90d4d 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -3,7 +3,7 @@ systemd: name: "{{ item }}" state: stopped - enabled: no + enabled: false failed_when: false with_items: - k3s @@ -45,19 +45,22 @@ - /var/lib/rancher/k3s - /var/lib/rancher/ - /var/lib/cni/ + - /etc/cni/net.d - name: Remove K3s http_proxy files file: name: "{{ item }}" state: absent with_items: + - "{{ systemd_dir }}/k3s.service.d/http_proxy.conf" - "{{ systemd_dir }}/k3s.service.d" + - "{{ systemd_dir }}/k3s-node.service.d/http_proxy.conf" - "{{ systemd_dir }}/k3s-node.service.d" when: proxy_env is defined - name: Reload daemon_reload systemd: - daemon_reload: yes + daemon_reload: true - name: Remove tmp directory used for manifests file: diff --git a/site.yml b/site.yml index 6dde6b1..e57deab 100644 --- a/site.yml +++ b/site.yml @@ -2,7 +2,7 @@ - name: Prepare Proxmox cluster hosts: proxmox gather_facts: true - become: yes + become: true environment: "{{ proxy_env | default({}) }}" roles: - role: proxmox_lxc @@ -10,7 +10,7 @@ - name: Prepare k3s nodes hosts: k3s_cluster - gather_facts: yes + gather_facts: true environment: "{{ proxy_env | default({}) }}" roles: - role: lxc @@ -46,3 +46,14 @@ roles: - role: k3s_server_post become: true + +- name: Storing kubeconfig in the playbook directory + hosts: master + environment: "{{ proxy_env | default({}) }}" + tasks: + - name: Copying kubeconfig from {{ hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] }} + ansible.builtin.fetch: + src: "{{ ansible_user_dir }}/.kube/config" + dest: ./kubeconfig + flat: true + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname']