diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 986c9cb..d9553de 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -37,6 +37,11 @@ systemd_dir: "" flannel_iface: "" +#calico_iface: "" +calico_ebpf: "" +calico_cidr: "" +calico_tag: "" + apiserver_endpoint: "" k3s_token: "NA" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a9b436..b8a6362 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,6 +12,7 @@ jobs: - default - ipv6 - single_node + - calico fail-fast: false env: PYTHON_VERSION: "3.11" diff --git a/inventory/sample/group_vars/all.yml b/inventory/sample/group_vars/all.yml index 216e062..4f433af 100644 --- a/inventory/sample/group_vars/all.yml +++ b/inventory/sample/group_vars/all.yml @@ -10,6 +10,12 @@ system_timezone: "Your/Timezone" # interface which will be used for flannel flannel_iface: "eth0" +# uncomment calico_iface to use tigera operator/calico cni instead of flannel https://docs.tigera.io/calico/latest/about +# calico_iface: "eth0" +calico_ebpf: false # use eBPF dataplane instead of iptables +calico_cidr: "10.52.0.0/16" # calico cluster pod cidr pool +calico_tag: "v3.27.0" # calico version tag + # apiserver_endpoint is virtual ip-address which will be configured on each master apiserver_endpoint: "192.168.30.222" @@ -20,23 +26,30 @@ k3s_token: "some-SUPER-DEDEUPER-secret-password" # The IP on which the node is reachable in the cluster. # Here, a sensible default is provided, you can still override # it for each of your hosts, though. -k3s_node_ip: '{{ ansible_facts[flannel_iface]["ipv4"]["address"] }}' +k3s_node_ip: "{{ ansible_facts[(calico_iface | default(flannel_iface))]['ipv4']['address'] }}" # Disable the taint manually by setting: k3s_master_taint = false k3s_master_taint: "{{ true if groups['node'] | default([]) | length >= 1 else false }}" # these arguments are recommended for servers as well as agents: extra_args: >- - --flannel-iface={{ flannel_iface }} + {{ '--flannel-iface=' + flannel_iface if calico_iface is not defined else '' }} --node-ip={{ k3s_node_ip }} # change these to your liking, the only required are: --disable servicelb, --tls-san {{ apiserver_endpoint }} +# the contents of the if block is also required if using calico extra_server_args: >- {{ extra_args }} {{ '--node-taint node-role.kubernetes.io/master=true:NoSchedule' if k3s_master_taint else '' }} + {% if calico_iface is defined %} + --flannel-backend=none + --disable-network-policy + --cluster-cidr={{ calico_cidr | default('10.52.0.0/16') }} + {% endif %} --tls-san {{ apiserver_endpoint }} --disable servicelb --disable traefik + extra_agent_args: >- {{ extra_args }} diff --git a/molecule/README.md b/molecule/README.md index 25cfdad..40c2fee 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -13,6 +13,8 @@ We have these scenarios: To save a bit of test time, this cluster is _not_ highly available, it consists of only one control and one worker node. - **single_node**: Very similar to the default scenario, but uses only a single node for all cluster functionality. +- **calico**: + The same as single node, but uses calico cni instead of flannel. ## How to execute diff --git a/molecule/calico/molecule.yml b/molecule/calico/molecule.yml new file mode 100644 index 0000000..f9487f3 --- /dev/null +++ b/molecule/calico/molecule.yml @@ -0,0 +1,49 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant +platforms: + - name: control1 + box: generic/ubuntu2204 + memory: 4096 + cpus: 4 + config_options: + # We currently can not use public-key based authentication on Ubuntu 22.04, + # see: https://github.com/chef/bento/issues/1405 + ssh.username: "vagrant" + ssh.password: "vagrant" + groups: + - k3s_cluster + - master + interfaces: + - network_name: private_network + ip: 192.168.30.62 +provisioner: + name: ansible + env: + ANSIBLE_VERBOSITY: 1 + playbooks: + converge: ../resources/converge.yml + side_effect: ../resources/reset.yml + verify: ../resources/verify.yml + inventory: + links: + group_vars: ../../inventory/sample/group_vars +scenario: + test_sequence: + - dependency + - cleanup + - destroy + - syntax + - create + - prepare + - converge + # idempotence is not possible with the playbook in its current form. + - verify + # We are repurposing side_effect here to test the reset playbook. + # This is why we do not run it before verify (which tests the cluster), + # but after the verify step. + - side_effect + - cleanup + - destroy diff --git a/molecule/calico/overrides.yml b/molecule/calico/overrides.yml new file mode 100644 index 0000000..8a78c51 --- /dev/null +++ b/molecule/calico/overrides.yml @@ -0,0 +1,16 @@ +--- +- name: Apply overrides + hosts: all + tasks: + - name: Override host variables + ansible.builtin.set_fact: + # See: + # https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant + calico_iface: eth1 + + # The test VMs might be a bit slow, so we give them more time to join the cluster: + retry_count: 45 + + # Make sure that our IP ranges do not collide with those of the other scenarios + apiserver_endpoint: "192.168.30.224" + metal_lb_ip_range: "192.168.30.100-192.168.30.109" diff --git a/roles/k3s_server_post/tasks/calico.yml b/roles/k3s_server_post/tasks/calico.yml new file mode 100644 index 0000000..bf8b1d5 --- /dev/null +++ b/roles/k3s_server_post/tasks/calico.yml @@ -0,0 +1,114 @@ +--- +- name: Deploy Calico to cluster + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] + run_once: true + block: + - name: Create manifests directory on first master + file: + path: /tmp/k3s + state: directory + owner: root + group: root + mode: 0755 + + - name: "Download to first master: manifest for Tigera Operator and Calico CRDs" + ansible.builtin.get_url: + url: "https://raw.githubusercontent.com/projectcalico/calico/{{ calico_tag }}/manifests/tigera-operator.yaml" + dest: "/tmp/k3s/tigera-operator.yaml" + owner: root + group: root + mode: 0755 + + - name: Copy Calico custom resources manifest to first master + ansible.builtin.template: + src: "calico.crs.j2" + dest: /tmp/k3s/custom-resources.yaml + owner: root + group: root + mode: 0755 + + - name: Deploy or replace Tigera Operator + block: + - name: Deploy Tigera Operator + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/tigera-operator.yaml + register: create_operator + changed_when: "'created' in create_operator.stdout" + failed_when: "'Error' in create_operator.stderr and 'already exists' not in create_operator.stderr" + rescue: + - name: Replace existing Tigera Operator + ansible.builtin.command: + cmd: kubectl replace -f /tmp/k3s/tigera-operator.yaml + register: replace_operator + changed_when: "'replaced' in replace_operator.stdout" + failed_when: "'Error' in replace_operator.stderr" + + - name: Wait for Tigera Operator resources + command: >- + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='tigera-operator' + --for=condition=Available=True + --timeout=7s + register: tigera_result + changed_when: false + until: tigera_result is succeeded + retries: 7 + delay: 7 + with_items: + - {name: tigera-operator, type: deployment} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Deploy Calico custom resources + block: + - name: Deploy custom resources for Calico + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/custom-resources.yaml + register: create_cr + changed_when: "'created' in create_cr.stdout" + failed_when: "'Error' in create_cr.stderr and 'already exists' not in create_cr.stderr" + rescue: + - name: Apply new Calico custom resource manifest + ansible.builtin.command: + cmd: kubectl apply -f /tmp/k3s/custom-resources.yaml + register: apply_cr + changed_when: "'configured' in apply_cr.stdout or 'created' in apply_cr.stdout" + failed_when: "'Error' in apply_cr.stderr" + + - name: Wait for Calico system resources to be available + command: >- + {% if item.type == 'daemonset' %} + k3s kubectl wait pods + --namespace='{{ item.namespace }}' + --selector={{ item.selector }} + --for=condition=Ready + {% else %} + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='{{ item.namespace }}' + --for=condition=Available + {% endif %} + --timeout=7s + register: cr_result + changed_when: false + until: cr_result is succeeded + retries: 30 + delay: 7 + with_items: + - {name: calico-typha, type: deployment, namespace: calico-system} + - {name: calico-kube-controllers, type: deployment, namespace: calico-system} + - {name: csi-node-driver, type: daemonset, selector: 'k8s-app=csi-node-driver', namespace: calico-system} + - {name: calico-node, type: daemonset, selector: 'k8s-app=calico-node', namespace: calico-system} + - {name: calico-apiserver, type: deployment, namespace: calico-apiserver} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Patch Felix configuration for eBPF mode + ansible.builtin.command: + cmd: > + kubectl patch felixconfiguration default + --type='merge' + --patch='{"spec": {"bpfKubeProxyIptablesCleanupEnabled": false}}' + register: patch_result + changed_when: "'felixconfiguration.projectcalico.org/default patched' in patch_result.stdout" + failed_when: "'Error' in patch_result.stderr" + when: calico_ebpf diff --git a/roles/k3s_server_post/tasks/main.yml b/roles/k3s_server_post/tasks/main.yml index f88dc08..505eebf 100644 --- a/roles/k3s_server_post/tasks/main.yml +++ b/roles/k3s_server_post/tasks/main.yml @@ -1,4 +1,9 @@ --- +- name: Deploy calico + include_tasks: calico.yml + tags: calico + when: calico_iface is defined + - name: Deploy metallb pool include_tasks: metallb.yml tags: metallb diff --git a/roles/k3s_server_post/templates/calico.crs.j2 b/roles/k3s_server_post/templates/calico.crs.j2 new file mode 100644 index 0000000..d33099d --- /dev/null +++ b/roles/k3s_server_post/templates/calico.crs.j2 @@ -0,0 +1,41 @@ +# This section includes base Calico installation configuration. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.Installation +apiVersion: operator.tigera.io/v1 +kind: Installation +metadata: + name: default +spec: + # Configures Calico networking. + calicoNetwork: + # Note: The ipPools section cannot be modified post-install. + ipPools: + - blockSize: {{ calico_blockSize | default('26') }} + cidr: {{ calico_cidr | default('10.52.0.0/16') }} + encapsulation: {{ calico_encapsulation | default('VXLANCrossSubnet') }} + natOutgoing: {{ calico_natOutgoing | default('Enabled') }} + nodeSelector: {{ calico_nodeSelector | default('all()') }} + nodeAddressAutodetectionV4: + interface: {{ calico_iface }} + linuxDataplane: {{ 'BPF' if calico_ebpf else 'Iptables' }} + +--- + +# This section configures the Calico API server. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.APIServer +apiVersion: operator.tigera.io/v1 +kind: APIServer +metadata: + name: default +spec: {} + +{% if calico_ebpf %} +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: kubernetes-services-endpoint + namespace: tigera-operator +data: + KUBERNETES_SERVICE_HOST: '{{ apiserver_endpoint }}' + KUBERNETES_SERVICE_PORT: '6443' +{% endif %} diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 49a4aa0..3e90d4d 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -45,6 +45,7 @@ - /var/lib/rancher/k3s - /var/lib/rancher/ - /var/lib/cni/ + - /etc/cni/net.d - name: Remove K3s http_proxy files file: