Skip to content

Commit

Permalink
Perform challenge callbacks into a node
Browse files Browse the repository at this point in the history
In order to verify that the caller is running on the specified node,
we source the expected IP address from the cloud, and require that the
node set up a simple challenge/response server to answer requests.

Because the challenge server runs on a port outside of the nodePort
range, this also makes it harder for pods to impersonate their host
nodes - though we do combine this with TPM and similar functionality
where it is available.
  • Loading branch information
justinsb committed May 6, 2023
1 parent b8a6bd4 commit c67f895
Show file tree
Hide file tree
Showing 23 changed files with 653 additions and 18 deletions.
29 changes: 23 additions & 6 deletions cmd/kops-controller/pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type Server struct {

// uncachedClient is an uncached client for the kube apiserver
uncachedClient client.Client

// challengeClient performs our callback-challenge into the node
challengeClient *bootstrap.ChallengeClient
}

var _ manager.LeaderElectionRunnable = &Server{}
Expand Down Expand Up @@ -94,6 +97,17 @@ func NewServer(opt *config.Options, verifier bootstrap.Verifier, uncachedClient
}
s.secretStore = secrets.NewVFSSecretStore(nil, p)

s.keystore, s.keypairIDs, err = newKeystore(opt.Server.CABasePath, opt.Server.SigningCAs)
if err != nil {
return nil, err
}

challengeClient, err := bootstrap.NewChallengeClient(s.keystore)
if err != nil {
return nil, err
}
s.challengeClient = challengeClient

r := http.NewServeMux()
r.Handle("/bootstrap", http.HandlerFunc(s.bootstrap))
server.Handler = recovery(r)
Expand All @@ -106,12 +120,6 @@ func (s *Server) NeedLeaderElection() bool {
}

func (s *Server) Start(ctx context.Context) error {
var err error
s.keystore, s.keypairIDs, err = newKeystore(s.opt.Server.CABasePath, s.opt.Server.SigningCAs)
if err != nil {
return err
}

go func() {
<-ctx.Done()

Expand Down Expand Up @@ -198,6 +206,15 @@ func (s *Server) bootstrap(w http.ResponseWriter, r *http.Request) {
return
}

if err := s.challengeClient.DoCallbackChallenge(ctx, s.opt.ClusterName, id.ChallengeEndpoint, req); err != nil {
klog.Infof("bootstrap %s callback challenge failed: %v", r.RemoteAddr, err)
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte("callback failed"))
return
}

klog.Infof("performed successful callback challenge with %s; identified as %s", id.ChallengeEndpoint, id.NodeName)

resp := &nodeup.BootstrapResponse{
Certs: map[string]string{},
}
Expand Down
2 changes: 2 additions & 0 deletions nodeup/pkg/model/bootstrap_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ func (b BootstrapClientBuilder) Build(c *fi.NodeupModelBuilderContext) error {
Certs: b.bootstrapCerts,
KeypairIDs: b.bootstrapKeypairIDs,
}
bootstrapClientTask.UseChallengeCallback = b.UseChallengeCallback()
bootstrapClientTask.ClusterName = b.NodeupConfig.ClusterName

for _, cert := range b.bootstrapCerts {
cert.Cert.Task = bootstrapClientTask
Expand Down
5 changes: 5 additions & 0 deletions nodeup/pkg/model/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,11 @@ func (c *NodeupModelContext) UseKopsControllerForNodeBootstrap() bool {
return model.UseKopsControllerForNodeBootstrap(c.Cluster)
}

// UseChallengeCallback is true if we should use a callback challenge during node provisioning with kops-controller.
func (c *NodeupModelContext) UseChallengeCallback() bool {
return model.UseChallengeCallback()
}

// UsesSecondaryIP checks if the CNI in use attaches secondary interfaces to the host.
func (c *NodeupModelContext) UsesSecondaryIP() bool {
return (c.NodeupConfig.Networking.CNI != nil && c.NodeupConfig.Networking.CNI.UsesSecondaryIP) ||
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/kops/model/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func UseKopsControllerForNodeBootstrap(cluster *kops.Cluster) bool {
}
}

// UseChallengeCallback is true if we should use a callback challenge during node provisioning with kops-controller.
func UseChallengeCallback() bool {
return true
}

// UseKopsControllerForNodeConfig checks if nodeup should use kops-controller to get nodeup.Config.
func UseKopsControllerForNodeConfig(cluster *kops.Cluster) bool {
switch cluster.Spec.GetCloudProvider() {
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/nodeup/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ type BootstrapRequest struct {
// IncludeNodeConfig controls whether the cluster & instance group configuration should be returned.
// This allows for nodes without access to the kops state store.
IncludeNodeConfig bool `json:"includeNodeConfig"`

// Challenge is for a callback challenge.
Challenge *ChallengeRequest `json:"challenge,omitempty"`
}

// ChallengeRequest describes the callback challenge.
type ChallengeRequest struct {
Endpoint string `json:"endpoint,omitempty"`
ServerCA []byte `json:"ca,omitempty"`
ChallengeID string `json:"challengeID,omitempty"`
ChallengeSecret []byte `json:"challengeSecret,omitempty"`
}

// BootstrapResponse is a response to a BootstrapRequest.
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/nodeup/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ type BootConfig struct {
// APIServerIPs is the API server IP addresses.
// This field is used for adding an alias for api.internal. in /etc/hosts, when Topology.DNS.Type == DNSTypeNone.
APIServerIPs []string `json:",omitempty"`
// ClusterName is the name of the cluster.
ClusterName string `json:",omitempty"`
// InstanceGroupName is the name of the instance group.
InstanceGroupName string `json:",omitempty"`
// InstanceGroupRole is the instance group role.
Expand Down Expand Up @@ -200,6 +202,7 @@ func NewConfig(cluster *kops.Cluster, instanceGroup *kops.InstanceGroup) (*Confi

bootConfig := BootConfig{
CloudProvider: cluster.Spec.GetCloudProvider(),
ClusterName: cluster.ObjectMeta.Name,
InstanceGroupName: instanceGroup.ObjectMeta.Name,
InstanceGroupRole: role,
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/bootstrap/authenticate.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ type VerifyResult struct {

// CertificateNames is the alternate names the node is authorized to use for certificates.
CertificateNames []string

// ChallengeEndpoint is a valid endpoints to which we should issue a challenge request,
// corresponding to the node the request identified as.
// This should be sourced from e.g. the cloud, and acts as a cross-check
// that this is the correct instance.
ChallengeEndpoint string
}

// Verifier verifies authentication credentials for requests.
Expand Down
101 changes: 101 additions & 0 deletions pkg/bootstrap/challenge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package bootstrap

import (
cryptorand "crypto/rand"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"fmt"
"math/big"
"time"

"k8s.io/klog/v2"
"k8s.io/kops/pkg/pki"
)

func randomBytes(length int) []byte {
b := make([]byte, length)
if _, err := cryptorand.Read(b); err != nil {
klog.Fatalf("failed to read from crypto/rand: %v", err)
}
return b
}

func challengeKopsControllerSubject(clusterName string) pkix.Name {
// Note: keep in sync with subjectsMatch if you add (additional) fields here
return pkix.Name{
CommonName: "kops-controller." + clusterName,
}
}

func subjectsMatch(l, r pkix.Name) bool {
// We need to check all the fields in challengeKopsControllerSubject
return l.CommonName == r.CommonName
}

func challengeServerHostName(clusterName string) string {
return "challenge-server." + clusterName
}

func BuildChallengeServerCertificate(clusterName string) (*tls.Certificate, error) {
serverName := challengeServerHostName(clusterName)

privateKey, err := pki.GeneratePrivateKey()
if err != nil {
return nil, fmt.Errorf("generating ecdsa key: %w", err)
}

keyUsage := x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment

now := time.Now()
notBefore := now.Add(-15 * time.Minute)
notAfter := notBefore.Add(time.Hour)

template := x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{
CommonName: serverName,
},
NotBefore: notBefore,
NotAfter: notAfter,

KeyUsage: keyUsage,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
BasicConstraintsValid: true,
}

template.DNSNames = append(template.DNSNames, serverName)

der, err := x509.CreateCertificate(cryptorand.Reader, &template, &template, privateKey.Key.Public(), privateKey.Key)
if err != nil {
return nil, fmt.Errorf("failed to create certificate: %w", err)
}

parsed, err := x509.ParseCertificate(der)
if err != nil {
return nil, fmt.Errorf("failed to parse certificate: %w", err)
}
tlsCertificate := &tls.Certificate{
PrivateKey: privateKey.Key,
Certificate: [][]byte{parsed.Raw},
Leaf: parsed,
}

return tlsCertificate, nil
}
128 changes: 128 additions & 0 deletions pkg/bootstrap/challenge_client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package bootstrap

import (
"context"
"crypto/subtle"
"crypto/tls"
"crypto/x509"
"fmt"
"time"

"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"k8s.io/kops/pkg/apis/nodeup"
"k8s.io/kops/pkg/pki"
pb "k8s.io/kops/proto/kops/bootstrap/v1"
"k8s.io/kops/upup/pkg/fi"
)

type ChallengeClient struct {
keystore pki.Keystore
}

func NewChallengeClient(keystore pki.Keystore) (*ChallengeClient, error) {
return &ChallengeClient{
keystore: keystore,
}, nil
}

func (c *ChallengeClient) getClientCertificate(ctx context.Context, clusterName string) (*tls.Certificate, error) {
subject := challengeKopsControllerSubject(clusterName)

certificate, privateKey, _, err := pki.IssueCert(ctx, &pki.IssueCertRequest{
Validity: 1 * time.Hour,
Signer: fi.CertificateIDCA,
Type: "client",
Subject: subject,
}, c.keystore)
if err != nil {
return nil, fmt.Errorf("error creating certificate: %w", err)
}

// TODO: Caching and rotation
clientCertificate := &tls.Certificate{
PrivateKey: privateKey.Key,
Certificate: [][]byte{certificate.Certificate.Raw},
Leaf: certificate.Certificate,
}
return clientCertificate, nil
}

func (c *ChallengeClient) DoCallbackChallenge(ctx context.Context, clusterName string, targetEndpoint string, bootstrapRequest *nodeup.BootstrapRequest) error {
challenge := bootstrapRequest.Challenge

if challenge == nil {
return fmt.Errorf("challenge not set")
}
if challenge.ChallengeID == "" {
return fmt.Errorf("challenge.id not set")
}
if len(challenge.ChallengeSecret) == 0 {
return fmt.Errorf("challenge.secret not set")
}
if challenge.Endpoint == "" {
return fmt.Errorf("challenge.endpoint not set")
}
if len(challenge.ServerCA) == 0 {
return fmt.Errorf("challenge.ca not set")
}

clientCertificate, err := c.getClientCertificate(ctx, clusterName)
if err != nil {
return err
}

serverCAs := x509.NewCertPool()
if !serverCAs.AppendCertsFromPEM(challenge.ServerCA) {
return fmt.Errorf("error loading certificate pool")
}

serverName := challengeServerHostName(clusterName)
tlsConfig := &tls.Config{
RootCAs: serverCAs,
Certificates: []tls.Certificate{*clientCertificate},
ServerName: serverName,
}

kospControllerNonce := randomBytes(16)
req := &pb.ChallengeRequest{
ChallengeId: challenge.ChallengeID,
ChallengeRandom: kospControllerNonce,
}

expectedChallengeResponse := buildChallengeResponse(challenge.ChallengeSecret, kospControllerNonce)

var opts []grpc.DialOption
opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)))
conn, err := grpc.DialContext(ctx, targetEndpoint, opts...)
if err != nil {
return fmt.Errorf("error dialing target %q: %w", targetEndpoint, err)
}
client := pb.NewCallbackServiceClient(conn)

response, err := client.Challenge(ctx, req)
if err != nil {
return fmt.Errorf("error from callback challenge: %w", err)
}

if subtle.ConstantTimeCompare(response.GetChallengeResponse(), expectedChallengeResponse) != 1 {
return fmt.Errorf("callback challenge returned wrong result")
}
return nil
}
Loading

0 comments on commit c67f895

Please sign in to comment.