Skip to content

Commit

Permalink
Use image pull error in message during back-off
Browse files Browse the repository at this point in the history
The container status waiting reason toggles between `ImagePullBackOff`
and the actual pull error, resulting in a bad user experience for
consumers like kubectl. For example, the output of
`kubectl get pods` does return either:

```
NAME   READY   STATUS                      RESTARTS   AGE
pod    0/1     SignatureValidationFailed   0          10s
```

or

```
NAME   READY   STATUS             RESTARTS   AGE
pod    0/1     ImagePullBackOff   0          18s
```

depending in which state the image pull is. We now improve that behavior
by preserving the actual error in the `message` of the `waiting` state
from the pull during back-off:

```json
{
  "waiting": {
    "message": "Back-off pulling image \"quay.io/crio/unsigned:latest\": SignatureValidationFailed: image pull failed for quay.io/crio/unsigned:latest because the signature validation failed: Source
 image rejected: A signature was required, but no signature exists",
    "reason": "ImagePullBackOff"
  }
}
```

While the `SignatureValidationFailed` value inherits from the previous
known state:

```json
{
  "waiting": {
    "message": "image pull failed for quay.io/crio/unsigned:latest because the signature validation failed: Source image rejected: A signature was required, but no signature exists",
    "reason": "SignatureValidationFailed"
  }
}
```

Signed-off-by: Sascha Grunert <[email protected]>
  • Loading branch information
saschagrunert committed Oct 18, 2024
1 parent 07e7368 commit 0fc4b74
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 35 deletions.
26 changes: 23 additions & 3 deletions pkg/kubelet/images/image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"

v1 "k8s.io/api/core/v1"
Expand All @@ -43,9 +44,11 @@ type ImagePodPullingTimeRecorder interface {

// imageManager provides the functionalities for image pulling.
type imageManager struct {
recorder record.EventRecorder
imageService kubecontainer.ImageService
backOff *flowcontrol.Backoff
recorder record.EventRecorder
imageService kubecontainer.ImageService
backOff *flowcontrol.Backoff
prevPullErrMsg sync.Map

// It will check the presence of the image, and report the 'image pulling', image pulled' events correspondingly.
puller imagePuller

Expand Down Expand Up @@ -154,8 +157,20 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
if m.backOff.IsInBackOffSinceUpdate(backOffKey, m.backOff.Clock.Now()) {
msg := fmt.Sprintf("Back-off pulling image %q", imgRef)
m.logIt(objRef, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info)

// Wrap the error from the actual pull if available.
// This information is populated to the pods
// .status.containerStatuses[*].state.waiting.message.
prevPullErrMsg, ok := m.prevPullErrMsg.Load(backOffKey)
if ok {
msg = fmt.Sprintf("%s: %s", msg, prevPullErrMsg)
}

return "", msg, ErrImagePullBackOff
}
// Ensure that the map cannot grow indefinitely.
m.prevPullErrMsg.Delete(backOffKey)

m.podPullingTimeRecorder.RecordImageStartedPulling(pod.UID)
m.logIt(objRef, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", imgRef), klog.Info)
startTime := time.Now()
Expand All @@ -167,6 +182,11 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
m.backOff.Next(backOffKey, m.backOff.Clock.Now())

msg, err := evalCRIPullErr(imgRef, imagePullResult.err)

// Store the actual pull error for providing that information during
// the image pull back-off.
m.prevPullErrMsg.Store(backOffKey, fmt.Sprintf("%s: %s", err, msg))

return "", msg, err
}
m.podPullingTimeRecorder.RecordImageFinishedPulling(pod.UID)
Expand Down
99 changes: 67 additions & 32 deletions pkg/kubelet/images/image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type pullerExpects struct {
shouldRecordStartedPullingTime bool
shouldRecordFinishedPullingTime bool
events []v1.Event
msg string
}

type pullerTestCase struct {
Expand Down Expand Up @@ -77,7 +78,7 @@ func pullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
}},

{ // image present, don't pull
Expand All @@ -92,15 +93,15 @@ func pullerTestCases() []pullerTestCase {
{[]string{"GetImageRef"}, nil, false, false,
[]v1.Event{
{Reason: "Pulled"},
}},
}, ""},
{[]string{"GetImageRef"}, nil, false, false,
[]v1.Event{
{Reason: "Pulled"},
}},
}, ""},
{[]string{"GetImageRef"}, nil, false, false,
[]v1.Event{
{Reason: "Pulled"},
}},
}, ""},
}},
// image present, pull it
{containerImage: "present_image",
Expand All @@ -115,17 +116,17 @@ func pullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
{[]string{"PullImage", "GetImageSize"}, nil, true, true,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
{[]string{"PullImage", "GetImageSize"}, nil, true, true,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
}},
// missing image, error PullNever
{containerImage: "missing_image",
Expand All @@ -139,15 +140,15 @@ func pullerTestCases() []pullerTestCase {
{[]string{"GetImageRef"}, ErrImageNeverPull, false, false,
[]v1.Event{
{Reason: "ErrImageNeverPull"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImageNeverPull, false, false,
[]v1.Event{
{Reason: "ErrImageNeverPull"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImageNeverPull, false, false,
[]v1.Event{
{Reason: "ErrImageNeverPull"},
}},
}, ""},
}},
// missing image, unable to inspect
{containerImage: "missing_image",
Expand All @@ -161,15 +162,15 @@ func pullerTestCases() []pullerTestCase {
{[]string{"GetImageRef"}, ErrImageInspect, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImageInspect, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImageInspect, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
}},
// missing image, unable to fetch
{containerImage: "typo_image",
Expand All @@ -184,29 +185,29 @@ func pullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}},
}, ""},
{[]string{"GetImageRef", "PullImage"}, ErrImagePull, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}},
}, ""},
{[]string{"GetImageRef", "PullImage"}, ErrImagePull, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}},
}, ""},
{[]string{"GetImageRef"}, ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}},
}, ""},
}},
// image present, non-zero qps, try to pull
{containerImage: "present_image",
Expand All @@ -221,17 +222,17 @@ func pullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
{[]string{"PullImage", "GetImageSize"}, nil, true, true,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
{[]string{"PullImage", "GetImageSize"}, nil, true, true,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Pulled"},
}},
}, ""},
}},
// image present, non-zero qps, try to pull when qps exceeded
{containerImage: "present_image",
Expand All @@ -246,16 +247,16 @@ func pullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}},
}, ""},
{[]string(nil), ErrImagePull, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}},
}, ""},
{[]string(nil), ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}},
}, ""},
}},
// error case if image name fails validation due to invalid reference format
{containerImage: "FAILED_IMAGE",
Expand All @@ -269,7 +270,7 @@ func pullerTestCases() []pullerTestCase {
{[]string(nil), ErrInvalidImageName, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
}},
// error case if image name contains http
{containerImage: "http://url",
Expand All @@ -283,7 +284,7 @@ func pullerTestCases() []pullerTestCase {
{[]string(nil), ErrInvalidImageName, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
}},
// error case if image name contains sha256
{containerImage: "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad",
Expand All @@ -297,7 +298,39 @@ func pullerTestCases() []pullerTestCase {
{[]string(nil), ErrInvalidImageName, false, false,
[]v1.Event{
{Reason: "InspectFailed"},
}},
}, ""},
}},
{containerImage: "typo_image",
testName: "image missing, SignatureValidationFailed",
policy: v1.PullIfNotPresent,
inspectErr: nil,
pullerErr: crierrors.ErrSignatureValidationFailed,
qps: 0.0,
burst: 0,
expected: []pullerExpects{
{[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}, "image pull failed for typo_image because the signature validation failed"},
{[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}, "image pull failed for typo_image because the signature validation failed"},
{[]string{"GetImageRef"}, ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"},
{[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false,
[]v1.Event{
{Reason: "Pulling"},
{Reason: "Failed"},
}, "image pull failed for typo_image because the signature validation failed"},
{[]string{"GetImageRef"}, ErrImagePullBackOff, false, false,
[]v1.Event{
{Reason: "BackOff"},
}, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"},
}},
}
}
Expand Down Expand Up @@ -372,11 +405,12 @@ func TestParallelPuller(t *testing.T) {
fakeRuntime.CalledFunctions = nil
fakeClock.Step(time.Second)

_, _, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy)
_, msg, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy)
fakeRuntime.AssertCalls(expected.calls)
assert.Equal(t, expected.err, err)
assert.Equal(t, expected.shouldRecordStartedPullingTime, fakePodPullingTimeRecorder.startedPullingRecorded)
assert.Equal(t, expected.shouldRecordFinishedPullingTime, fakePodPullingTimeRecorder.finishedPullingRecorded)
assert.Contains(t, msg, expected.msg)
fakePodPullingTimeRecorder.reset()
}
})
Expand Down Expand Up @@ -404,11 +438,12 @@ func TestSerializedPuller(t *testing.T) {
fakeRuntime.CalledFunctions = nil
fakeClock.Step(time.Second)

_, _, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy)
_, msg, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy)
fakeRuntime.AssertCalls(expected.calls)
assert.Equal(t, expected.err, err)
assert.Equal(t, expected.shouldRecordStartedPullingTime, fakePodPullingTimeRecorder.startedPullingRecorded)
assert.Equal(t, expected.shouldRecordFinishedPullingTime, fakePodPullingTimeRecorder.finishedPullingRecorded)
assert.Contains(t, msg, expected.msg)
fakePodPullingTimeRecorder.reset()
}
})
Expand Down Expand Up @@ -456,7 +491,7 @@ func TestPullAndListImageWithPodAnnotations(t *testing.T) {
inspectErr: nil,
pullerErr: nil,
expected: []pullerExpects{
{[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil},
{[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil, ""},
}}

useSerializedEnv := true
Expand Down Expand Up @@ -512,7 +547,7 @@ func TestPullAndListImageWithRuntimeHandlerInImageCriAPIFeatureGate(t *testing.T
inspectErr: nil,
pullerErr: nil,
expected: []pullerExpects{
{[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil},
{[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil, ""},
}}

useSerializedEnv := true
Expand Down

0 comments on commit 0fc4b74

Please sign in to comment.