From 0f23e7e25882e7d7455b49896f867971c7a5f1b3 Mon Sep 17 00:00:00 2001 From: sholdee <102821812+sholdee@users.noreply.github.com> Date: Fri, 26 Jan 2024 18:53:27 -0600 Subject: [PATCH] Add Calico CNI option (#414) * Add Tigera Operator/Calico CNI option Small tweak to reduce delta from head Set calico option to be disabled by default Add rescue blocks in case updating existing Refactor items and update comments Refactor and consolidate calico.yml into block Refactor to use template for Calico CRs Revert use_calico to false Template blockSize Align default cidr in template with all.yml sample Apply upstream version tags Revert to current ver tags. Upstream's don't work. Update template address detection Add Tigera Operator/Calico CNI option * Add calico-apiserver check * Add eBPF dataplane option * Add kube svc endpoint configmap when ebpf enabled * Add /etc/cni/net.d to reset task * Refactor based on comments * Add molecule scenario * Fix lint --------- Co-authored-by: Techno Tim --- .github/ISSUE_TEMPLATE.md | 5 + .github/workflows/test.yml | 1 + inventory/sample/group_vars/all.yml | 17 ++- molecule/README.md | 2 + molecule/calico/molecule.yml | 49 ++++++++ molecule/calico/overrides.yml | 16 +++ roles/k3s_server_post/tasks/calico.yml | 114 ++++++++++++++++++ roles/k3s_server_post/tasks/main.yml | 5 + roles/k3s_server_post/templates/calico.crs.j2 | 41 +++++++ roles/reset/tasks/main.yml | 1 + 10 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 molecule/calico/molecule.yml create mode 100644 molecule/calico/overrides.yml create mode 100644 roles/k3s_server_post/tasks/calico.yml create mode 100644 roles/k3s_server_post/templates/calico.crs.j2 diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 986c9cb..d9553de 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -37,6 +37,11 @@ systemd_dir: "" flannel_iface: "" +#calico_iface: "" +calico_ebpf: "" +calico_cidr: "" +calico_tag: "" + apiserver_endpoint: "" k3s_token: "NA" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a9b436..b8a6362 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,6 +12,7 @@ jobs: - default - ipv6 - single_node + - calico fail-fast: false env: PYTHON_VERSION: "3.11" diff --git a/inventory/sample/group_vars/all.yml b/inventory/sample/group_vars/all.yml index 216e062..4f433af 100644 --- a/inventory/sample/group_vars/all.yml +++ b/inventory/sample/group_vars/all.yml @@ -10,6 +10,12 @@ system_timezone: "Your/Timezone" # interface which will be used for flannel flannel_iface: "eth0" +# uncomment calico_iface to use tigera operator/calico cni instead of flannel https://docs.tigera.io/calico/latest/about +# calico_iface: "eth0" +calico_ebpf: false # use eBPF dataplane instead of iptables +calico_cidr: "10.52.0.0/16" # calico cluster pod cidr pool +calico_tag: "v3.27.0" # calico version tag + # apiserver_endpoint is virtual ip-address which will be configured on each master apiserver_endpoint: "192.168.30.222" @@ -20,23 +26,30 @@ k3s_token: "some-SUPER-DEDEUPER-secret-password" # The IP on which the node is reachable in the cluster. # Here, a sensible default is provided, you can still override # it for each of your hosts, though. -k3s_node_ip: '{{ ansible_facts[flannel_iface]["ipv4"]["address"] }}' +k3s_node_ip: "{{ ansible_facts[(calico_iface | default(flannel_iface))]['ipv4']['address'] }}" # Disable the taint manually by setting: k3s_master_taint = false k3s_master_taint: "{{ true if groups['node'] | default([]) | length >= 1 else false }}" # these arguments are recommended for servers as well as agents: extra_args: >- - --flannel-iface={{ flannel_iface }} + {{ '--flannel-iface=' + flannel_iface if calico_iface is not defined else '' }} --node-ip={{ k3s_node_ip }} # change these to your liking, the only required are: --disable servicelb, --tls-san {{ apiserver_endpoint }} +# the contents of the if block is also required if using calico extra_server_args: >- {{ extra_args }} {{ '--node-taint node-role.kubernetes.io/master=true:NoSchedule' if k3s_master_taint else '' }} + {% if calico_iface is defined %} + --flannel-backend=none + --disable-network-policy + --cluster-cidr={{ calico_cidr | default('10.52.0.0/16') }} + {% endif %} --tls-san {{ apiserver_endpoint }} --disable servicelb --disable traefik + extra_agent_args: >- {{ extra_args }} diff --git a/molecule/README.md b/molecule/README.md index 25cfdad..40c2fee 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -13,6 +13,8 @@ We have these scenarios: To save a bit of test time, this cluster is _not_ highly available, it consists of only one control and one worker node. - **single_node**: Very similar to the default scenario, but uses only a single node for all cluster functionality. +- **calico**: + The same as single node, but uses calico cni instead of flannel. ## How to execute diff --git a/molecule/calico/molecule.yml b/molecule/calico/molecule.yml new file mode 100644 index 0000000..f9487f3 --- /dev/null +++ b/molecule/calico/molecule.yml @@ -0,0 +1,49 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant +platforms: + - name: control1 + box: generic/ubuntu2204 + memory: 4096 + cpus: 4 + config_options: + # We currently can not use public-key based authentication on Ubuntu 22.04, + # see: https://github.com/chef/bento/issues/1405 + ssh.username: "vagrant" + ssh.password: "vagrant" + groups: + - k3s_cluster + - master + interfaces: + - network_name: private_network + ip: 192.168.30.62 +provisioner: + name: ansible + env: + ANSIBLE_VERBOSITY: 1 + playbooks: + converge: ../resources/converge.yml + side_effect: ../resources/reset.yml + verify: ../resources/verify.yml + inventory: + links: + group_vars: ../../inventory/sample/group_vars +scenario: + test_sequence: + - dependency + - cleanup + - destroy + - syntax + - create + - prepare + - converge + # idempotence is not possible with the playbook in its current form. + - verify + # We are repurposing side_effect here to test the reset playbook. + # This is why we do not run it before verify (which tests the cluster), + # but after the verify step. + - side_effect + - cleanup + - destroy diff --git a/molecule/calico/overrides.yml b/molecule/calico/overrides.yml new file mode 100644 index 0000000..8a78c51 --- /dev/null +++ b/molecule/calico/overrides.yml @@ -0,0 +1,16 @@ +--- +- name: Apply overrides + hosts: all + tasks: + - name: Override host variables + ansible.builtin.set_fact: + # See: + # https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant + calico_iface: eth1 + + # The test VMs might be a bit slow, so we give them more time to join the cluster: + retry_count: 45 + + # Make sure that our IP ranges do not collide with those of the other scenarios + apiserver_endpoint: "192.168.30.224" + metal_lb_ip_range: "192.168.30.100-192.168.30.109" diff --git a/roles/k3s_server_post/tasks/calico.yml b/roles/k3s_server_post/tasks/calico.yml new file mode 100644 index 0000000..bf8b1d5 --- /dev/null +++ b/roles/k3s_server_post/tasks/calico.yml @@ -0,0 +1,114 @@ +--- +- name: Deploy Calico to cluster + when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname'] + run_once: true + block: + - name: Create manifests directory on first master + file: + path: /tmp/k3s + state: directory + owner: root + group: root + mode: 0755 + + - name: "Download to first master: manifest for Tigera Operator and Calico CRDs" + ansible.builtin.get_url: + url: "https://raw.githubusercontent.com/projectcalico/calico/{{ calico_tag }}/manifests/tigera-operator.yaml" + dest: "/tmp/k3s/tigera-operator.yaml" + owner: root + group: root + mode: 0755 + + - name: Copy Calico custom resources manifest to first master + ansible.builtin.template: + src: "calico.crs.j2" + dest: /tmp/k3s/custom-resources.yaml + owner: root + group: root + mode: 0755 + + - name: Deploy or replace Tigera Operator + block: + - name: Deploy Tigera Operator + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/tigera-operator.yaml + register: create_operator + changed_when: "'created' in create_operator.stdout" + failed_when: "'Error' in create_operator.stderr and 'already exists' not in create_operator.stderr" + rescue: + - name: Replace existing Tigera Operator + ansible.builtin.command: + cmd: kubectl replace -f /tmp/k3s/tigera-operator.yaml + register: replace_operator + changed_when: "'replaced' in replace_operator.stdout" + failed_when: "'Error' in replace_operator.stderr" + + - name: Wait for Tigera Operator resources + command: >- + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='tigera-operator' + --for=condition=Available=True + --timeout=7s + register: tigera_result + changed_when: false + until: tigera_result is succeeded + retries: 7 + delay: 7 + with_items: + - {name: tigera-operator, type: deployment} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Deploy Calico custom resources + block: + - name: Deploy custom resources for Calico + ansible.builtin.command: + cmd: kubectl create -f /tmp/k3s/custom-resources.yaml + register: create_cr + changed_when: "'created' in create_cr.stdout" + failed_when: "'Error' in create_cr.stderr and 'already exists' not in create_cr.stderr" + rescue: + - name: Apply new Calico custom resource manifest + ansible.builtin.command: + cmd: kubectl apply -f /tmp/k3s/custom-resources.yaml + register: apply_cr + changed_when: "'configured' in apply_cr.stdout or 'created' in apply_cr.stdout" + failed_when: "'Error' in apply_cr.stderr" + + - name: Wait for Calico system resources to be available + command: >- + {% if item.type == 'daemonset' %} + k3s kubectl wait pods + --namespace='{{ item.namespace }}' + --selector={{ item.selector }} + --for=condition=Ready + {% else %} + k3s kubectl wait {{ item.type }}/{{ item.name }} + --namespace='{{ item.namespace }}' + --for=condition=Available + {% endif %} + --timeout=7s + register: cr_result + changed_when: false + until: cr_result is succeeded + retries: 30 + delay: 7 + with_items: + - {name: calico-typha, type: deployment, namespace: calico-system} + - {name: calico-kube-controllers, type: deployment, namespace: calico-system} + - {name: csi-node-driver, type: daemonset, selector: 'k8s-app=csi-node-driver', namespace: calico-system} + - {name: calico-node, type: daemonset, selector: 'k8s-app=calico-node', namespace: calico-system} + - {name: calico-apiserver, type: deployment, namespace: calico-apiserver} + loop_control: + label: "{{ item.type }}/{{ item.name }}" + + - name: Patch Felix configuration for eBPF mode + ansible.builtin.command: + cmd: > + kubectl patch felixconfiguration default + --type='merge' + --patch='{"spec": {"bpfKubeProxyIptablesCleanupEnabled": false}}' + register: patch_result + changed_when: "'felixconfiguration.projectcalico.org/default patched' in patch_result.stdout" + failed_when: "'Error' in patch_result.stderr" + when: calico_ebpf diff --git a/roles/k3s_server_post/tasks/main.yml b/roles/k3s_server_post/tasks/main.yml index f88dc08..505eebf 100644 --- a/roles/k3s_server_post/tasks/main.yml +++ b/roles/k3s_server_post/tasks/main.yml @@ -1,4 +1,9 @@ --- +- name: Deploy calico + include_tasks: calico.yml + tags: calico + when: calico_iface is defined + - name: Deploy metallb pool include_tasks: metallb.yml tags: metallb diff --git a/roles/k3s_server_post/templates/calico.crs.j2 b/roles/k3s_server_post/templates/calico.crs.j2 new file mode 100644 index 0000000..d33099d --- /dev/null +++ b/roles/k3s_server_post/templates/calico.crs.j2 @@ -0,0 +1,41 @@ +# This section includes base Calico installation configuration. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.Installation +apiVersion: operator.tigera.io/v1 +kind: Installation +metadata: + name: default +spec: + # Configures Calico networking. + calicoNetwork: + # Note: The ipPools section cannot be modified post-install. + ipPools: + - blockSize: {{ calico_blockSize | default('26') }} + cidr: {{ calico_cidr | default('10.52.0.0/16') }} + encapsulation: {{ calico_encapsulation | default('VXLANCrossSubnet') }} + natOutgoing: {{ calico_natOutgoing | default('Enabled') }} + nodeSelector: {{ calico_nodeSelector | default('all()') }} + nodeAddressAutodetectionV4: + interface: {{ calico_iface }} + linuxDataplane: {{ 'BPF' if calico_ebpf else 'Iptables' }} + +--- + +# This section configures the Calico API server. +# For more information, see: https://docs.tigera.io/calico/latest/reference/installation/api#operator.tigera.io/v1.APIServer +apiVersion: operator.tigera.io/v1 +kind: APIServer +metadata: + name: default +spec: {} + +{% if calico_ebpf %} +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: kubernetes-services-endpoint + namespace: tigera-operator +data: + KUBERNETES_SERVICE_HOST: '{{ apiserver_endpoint }}' + KUBERNETES_SERVICE_PORT: '6443' +{% endif %} diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 49a4aa0..3e90d4d 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -45,6 +45,7 @@ - /var/lib/rancher/k3s - /var/lib/rancher/ - /var/lib/cni/ + - /etc/cni/net.d - name: Remove K3s http_proxy files file: