Skip to content

Commit

Permalink
Add strict TLS mode support (rancher#2507)
Browse files Browse the repository at this point in the history
* Add agentTLSMode option

Fleet now supports two distinct TLS mode for its agent when registering
against an upstream cluster:
* `system-store`, the default, does not change its current behaviour:
  the Fleet agent trusts any certificate signed by a CA found in its
  system store. In this mode, Fleet will also ignore a configured CA,
  if the system trust store is sufficient.
* `strict`, to bypass the system store when validating a certificate.

* Redeploy Fleet agent when TLS mode setting changes

This commit takes care of watching the agent TLS mode setting in the
`fleet-controller` config map, and of redeploying the Fleet agent to
upstream and downstream clusters when that setting changes.
Note that this only works for downstream clusters registered through a
manager-initiated process [1].

Testing this is done by reusing existing agent TLS mode test cases, and
triggering new deployments of the Fleet agent by patching the
`fleet-controller` config map.
Requirements for this include a cluster registered in manager-initiated
mode, while existing multi-cluster end-to-end tests need a downstream
cluster registered in agent-initiated mode.
Therefore, this commit also adds a new downstream cluster to the
multi-cluster CI workflow, which is so far only used for agent TLS mode
tests.

[1]: https://fleet.rancher.io/cluster-registration#manager-initiated
  • Loading branch information
weyfonk authored Jun 26, 2024
1 parent c220a0d commit de3f46a
Show file tree
Hide file tree
Showing 16 changed files with 353 additions and 30 deletions.
6 changes: 6 additions & 0 deletions .github/scripts/deploy-fleet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ else
agentTag="dev"
fi

host=$(kubectl get node k3d-upstream-server-0 -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')
ca=$( kubectl config view --flatten -o jsonpath='{.clusters[?(@.name == "k3d-upstream")].cluster.certificate-authority-data}' | base64 -d )
server="https://$host:6443"

eventually helm upgrade --install fleet-crd charts/fleet-crd \
--atomic \
-n cattle-fleet-system \
Expand All @@ -37,6 +41,8 @@ eventually helm upgrade --install fleet charts/fleet \
--set agentImage.repository="$agentRepo" \
--set agentImage.tag="$agentTag" \
--set agentImage.imagePullPolicy=IfNotPresent \
--set apiServerCA="$ca" \
--set apiServerURL="$server" \
--set shards="{$shards}" \
--set debug=true --set debugLevel=1

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ jobs:
# k3d will automatically create a network named k3d-test-cluster-1 with the range 172.18.0.0/16
with:
k3d-version: ${{ env.SETUP_K3D_VERSION }}
cluster-name: "k3s-default"
cluster-name: "upstream"
args: >-
--agents 1
--network "nw01"
--image docker.io/rancher/k3s:${{matrix.k3s_version}}
-
name: Import Images Into k3d
run: |
./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test
./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test -c upstream
-
name: Set Up Tmate Debug Session
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.enable_tmate == 'true' }}
Expand Down
79 changes: 78 additions & 1 deletion .github/workflows/e2e-multicluster-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
--agents 1
--network "nw01"
-
name: Provision k3d Downstream Cluster
name: Provision k3d Downstream Cluster for agent-initiated registration
uses: AbsaOSS/k3d-action@v2
with:
k3d-version: ${{ env.SETUP_K3D_VERSION }}
Expand All @@ -69,11 +69,24 @@ jobs:
--api-port 6644
--agents 1
--network "nw01"
-
name: Provision k3d Downstream Cluster for manager-initiated registration
uses: AbsaOSS/k3d-action@v2
with:
k3d-version: ${{ env.SETUP_K3D_VERSION }}
cluster-name: "managed-downstream"
args: >-
-p "82:80@agent:0:direct"
-p "445:443@agent:0:direct"
--api-port 6645
--agents 1
--network "nw01"
-
name: Import Images Into k3d
run: |
./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev -c upstream
./.github/scripts/k3d-import-retry.sh rancher/fleet-agent:dev -c downstream
./.github/scripts/k3d-import-retry.sh rancher/fleet-agent:dev -c managed-downstream
-
name: Set Up Tmate Debug Session
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.enable_tmate == 'true' }}
Expand Down Expand Up @@ -127,6 +140,61 @@ jobs:
while [ $(kubectl -n fleet-default get cluster -o jsonpath='{.items[0].status.summary.ready}') -ne 1 ]; do
sleep 1
done
-
name: Deploy and Register Managed Downstream Fleet
run: |
kubectl config use-context k3d-managed-downstream
host=$(kubectl get node k3d-managed-downstream-server-0 -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')
ca=$( kubectl config view --flatten -o jsonpath='{.clusters[?(@.name == "k3d-managed-downstream")].cluster.certificate-authority-data}' )
client_cert=$( kubectl config view --flatten -o jsonpath='{.users[?(@.name == "admin@k3d-managed-downstream")].user.client-certificate-data}' )
token=$( kubectl config view --flatten -o jsonpath='{.users[?(@.name == "admin@k3d-managed-downstream")].user.client-key-data}' )
server="https://$host:6443"
kubectl config use-context k3d-upstream
value=$(cat <<EOF
apiVersion: v1
kind: Config
current-context: default
clusters:
- cluster:
certificate-authority-data: $ca
server: $server
name: cluster
contexts:
- context:
cluster: cluster
user: user
name: default
preferences: {}
users:
- name: user
user:
client-certificate-data: $client_cert
client-key-data: $token
EOF
)
kubectl create ns fleet-default || true
kubectl delete secret -n fleet-default kbcfg-second || true
# Rancher sets a token value in the secret, but our docs don't mention it
# * https://github.com/rancher/rancher/blob/c24fb8b0869a0b445f55b3307c6ed4582e147747/pkg/provisioningv2/kubeconfig/manager.go#L362
# * https://fleet.rancher.io/0.5/manager-initiated#kubeconfig-secret-1
kubectl create secret generic -n fleet-default kbcfg-second --from-literal=token="$token" --from-literal=value="$value"
kubectl apply -n fleet-default -f - <<EOF
apiVersion: "fleet.cattle.io/v1alpha1"
kind: Cluster
metadata:
name: second
namespace: fleet-default
labels:
name: second
spec:
kubeConfigSecret: kbcfg-second
EOF
-
name: E2E tests
env:
Expand All @@ -135,6 +203,15 @@ jobs:
run: |
kubectl config use-context k3d-upstream
ginkgo --github-output e2e/multi-cluster
-
name: E2E tests with managed downstream agent
env:
FLEET_E2E_NS: fleet-local
FLEET_E2E_NS_DOWNSTREAM: fleet-default
FLEET_E2E_CLUSTER_DOWNSTREAM: k3d-managed-downstream
run: |
kubectl config use-context k3d-upstream
ginkgo --github-output e2e/multi-cluster/installation
-
name: Acceptance Tests for Examples
if: >
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nightly-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ jobs:
# k3d will automatically create a network named k3d-test-cluster-1 with the range 172.18.0.0/16
with:
k3d-version: ${{ env.SETUP_K3D_VERSION }}
cluster-name: "k3s-default"
cluster-name: "upstream"
args: >-
--agents 1
--network "nw01"
--image docker.io/rancher/k3s:${{matrix.k3s_version}}
-
name: Import Images Into k3d
run: |
./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test
./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test -c upstream
-
name: Set Up Tmate Debug Session
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.enable_tmate == 'true' }}
Expand Down
3 changes: 2 additions & 1 deletion charts/fleet-agent/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ data:
{{ if .Values.labels }}
"labels":{{toJson .Values.labels}},
{{ end }}
"clientID":"{{.Values.clientID}}"
"clientID":"{{.Values.clientID}}",
"agentTLSMode": "{{.Values.agentTLSMode}}"
}
4 changes: 4 additions & 0 deletions charts/fleet-agent/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ apiServerURL: ""
# If left empty it is assumed this Kubernetes API TLS is signed by a well known CA.
apiServerCA: ""

# Determines whether the agent should trust CA bundles from the operating system's trust store when connecting to a
# management cluster. True in `system-store` mode, false in `strict` mode.
agentTLSMode: "system-store"

# The cluster registration value
token: ""

Expand Down
14 changes: 14 additions & 0 deletions charts/fleet-crd/templates/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5173,6 +5173,20 @@ spec:
used to detect changes.'
nullable: true
type: string
agentTLSMode:
description: 'AgentTLSMode supports two values: `system-store` and
`strict`. If set to
`system-store`, instructs the agent to trust CA bundles from the
operating
system''s store. If set to `strict`, then the agent shall only
connect to a
server which uses the exact CA configured when creating/updating
the agent.'
nullable: true
type: string
agentTolerationsHash:
description: 'AgentTolerationsHash is a hash of the agent''s tolerations
Expand Down
1 change: 1 addition & 0 deletions charts/fleet/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ data:
"apiServerURL": "{{.Values.apiServerURL}}",
"apiServerCA": "{{b64enc .Values.apiServerCA}}",
"agentCheckinInterval": "{{.Values.agentCheckinInterval}}",
"agentTLSMode": "{{.Values.agentTLSMode}}",
"ignoreClusterRegistrationLabels": {{.Values.ignoreClusterRegistrationLabels}},
"bootstrap": {
"paths": "{{.Values.bootstrap.paths}}",
Expand Down
4 changes: 4 additions & 0 deletions charts/fleet/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ apiServerURL: ""
# If left empty it is assumed this Kubernetes API TLS is signed by a well known CA.
apiServerCA: ""

# Determines whether the agent should trust CA bundles from the operating system's trust store when connecting to a
# management cluster. True in `system-store` mode, false in `strict` mode.
agentTLSMode: "system-store"

# A duration string for how often agents should report a heartbeat
agentCheckinInterval: "15m"

Expand Down
104 changes: 104 additions & 0 deletions e2e/multi-cluster/installation/agent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package installation_test

import (
"fmt"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/matchers"
"github.com/rancher/fleet/e2e/testenv/kubectl"
)

var (
agentMode string
kd kubectl.Command
)

var _ = Describe("Fleet installation with TLS agent modes", func() {
BeforeEach(func() {
kd = env.Kubectl.Context(env.Downstream)
})

JustBeforeEach(func() {
out, err := ku.Patch(
"configmap",
"fleet-controller",
"-n",
"cattle-fleet-system",
"--type=merge",
"-p",
fmt.Sprintf(
`{"data":{"config":"{\"apiServerURL\": \"https://google.com\", \"apiServerCA\": \"\", \"agentTLSMode\": \"%s\"}"}}`,
agentMode,
),
)
Expect(err).ToNot(HaveOccurred(), string(out))

})

Context("with non-strict agent TLS mode", func() {
When("fetching fleet-agent-register logs", func() {
BeforeEach(func() {
agentMode = "system-store"
})

It("reaches the server without cert issues", func() {
Eventually(func() bool {
logs, err := kd.Namespace("cattle-fleet-system").Logs(
"-l",
"app=fleet-agent",
"-c",
"fleet-agent-register",
"--tail=-1",
)
if err != nil {
return false
}

regexMatcher := matchers.MatchRegexpMatcher{
Regexp: "Failed to register agent.*could not find the requested resource",
}
reachesServerWithoutCertIssue, err := regexMatcher.Match(logs)
if err != nil {
return false
}

return reachesServerWithoutCertIssue
}).Should(BeTrue())
})
})
})

Context("with strict agent TLS mode", func() {
When("fetching fleet-agent-register logs", func() {
BeforeEach(func() {
agentMode = "strict"
})

It("cannot reach the server because the cert is signed by an unknown authority", func() {
Eventually(func() bool {
logs, err := kd.Namespace("cattle-fleet-system").Logs(
"-l",
"app=fleet-agent",
"-c",
"fleet-agent-register",
"--tail=-1",
)
if err != nil {
return false
}

regexMatcher := matchers.MatchRegexpMatcher{
Regexp: "Failed to register agent.*signed by unknown authority",
}
reachesServerWithoutCertIssue, err := regexMatcher.Match(logs)
if err != nil {
return false
}

return reachesServerWithoutCertIssue
}).Should(BeTrue())
})
})
})
})
61 changes: 61 additions & 0 deletions e2e/multi-cluster/installation/suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Package installation contains e2e tests deploying Fleet to multiple clusters. The tests use kubectl to apply
// manifests. Expectations are verified by checking cluster resources.
package installation_test

import (
"fmt"
"strings"
"testing"

"github.com/rancher/fleet/e2e/testenv"
"github.com/rancher/fleet/e2e/testenv/kubectl"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestE2E(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "E2E Installation Suite for Multi-Cluster")
}

var (
env *testenv.Env
ku kubectl.Command
config string
)

var _ = BeforeSuite(func() {
SetDefaultEventuallyTimeout(testenv.Timeout)
testenv.SetRoot("../..")

env = testenv.New()
ku = env.Kubectl.Context(env.Upstream)

// Save initial state of `fleet-controller` config map
cfg, err := ku.Get(
"configmap",
"fleet-controller",
"-n",
"cattle-fleet-system",
"-o",
"jsonpath={.data.config}")
Expect(err).ToNot(HaveOccurred(), cfg)

cfg = strings.ReplaceAll(cfg, `"`, `\"`)
config = strings.ReplaceAll(cfg, "\n", "")
})

var _ = AfterSuite(func() {
// Restore initial state of config map
out, err := ku.Patch(
"configmap",
"fleet-controller",
"-n",
"cattle-fleet-system",
"--type=merge",
"-p",
fmt.Sprintf(`{"data":{"config":"%s"}}`, config),
)
Expect(err).ToNot(HaveOccurred(), string(out))
})
Loading

0 comments on commit de3f46a

Please sign in to comment.