Skip to content

Commit

Permalink
Merge pull request kubernetes#111113 from mimowo/retriable-pod-failur…
Browse files Browse the repository at this point in the history
…es-job-controller

Support handling of pod failures with respect to the configured rules
  • Loading branch information
k8s-ci-robot authored Aug 4, 2022
2 parents d0c92ae + bf9ce70 commit eefcf6a
Show file tree
Hide file tree
Showing 43 changed files with 5,960 additions and 153 deletions.
92 changes: 92 additions & 0 deletions api/openapi-spec/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

115 changes: 115 additions & 0 deletions api/openapi-spec/v3/apis__batch__v1_openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,14 @@
"format": "int32",
"type": "integer"
},
"podFailurePolicy": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.batch.v1.PodFailurePolicy"
}
],
"description": "Specifies the policy of handling failed pods. In particular, it allows to specify the set of actions and conditions which need to be satisfied to take the associated action. If empty, the default behaviour applies - the counter of failed pods, represented by the jobs's .status.failed field, is incremented and it is checked against the backoffLimit. This field cannot be used in combination with restartPolicy=OnFailure.\n\nThis field is alpha-level. To use this field, you must enable the `JobPodFailurePolicy` feature gate (disabled by default)."
},
"selector": {
"allOf": [
{
Expand Down Expand Up @@ -475,6 +483,113 @@
},
"type": "object"
},
"io.k8s.api.batch.v1.PodFailurePolicy": {
"description": "PodFailurePolicy describes how failed pods influence the backoffLimit.",
"properties": {
"rules": {
"description": "A list of pod failure policy rules. The rules are evaluated in order. Once a rule matches a Pod failure, the remaining of the rules are ignored. When no rule matches the Pod failure, the default handling applies - the counter of pod failures is incremented and it is checked against the backoffLimit. At most 20 elements are allowed.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.batch.v1.PodFailurePolicyRule"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
}
},
"required": [
"rules"
],
"type": "object"
},
"io.k8s.api.batch.v1.PodFailurePolicyOnExitCodesRequirement": {
"description": "PodFailurePolicyOnExitCodesRequirement describes the requirement for handling a failed pod based on its container exit codes. In particular, it lookups the .state.terminated.exitCode for each app container and init container status, represented by the .status.containerStatuses and .status.initContainerStatuses fields in the Pod status, respectively. Containers completed with success (exit code 0) are excluded from the requirement check.",
"properties": {
"containerName": {
"description": "Restricts the check for exit codes to the container with the specified name. When null, the rule applies to all containers. When specified, it should match one the container or initContainer names in the pod template.",
"type": "string"
},
"operator": {
"default": "",
"description": "Represents the relationship between the container exit code(s) and the specified values. Containers completed with success (exit code 0) are excluded from the requirement check. Possible values are: - In: the requirement is satisfied if at least one container exit code\n (might be multiple if there are multiple containers not restricted\n by the 'containerName' field) is in the set of specified values.\n- NotIn: the requirement is satisfied if at least one container exit code\n (might be multiple if there are multiple containers not restricted\n by the 'containerName' field) is not in the set of specified values.\nAdditional values are considered to be added in the future. Clients should react to an unknown operator by assuming the requirement is not satisfied.\n\n",
"type": "string"
},
"values": {
"description": "Specifies the set of values. Each returned container exit code (might be multiple in case of multiple containers) is checked against this set of values with respect to the operator. The list of values must be ordered and must not contain duplicates. Value '0' cannot be used for the In operator. At least one element is required. At most 255 elements are allowed.",
"items": {
"default": 0,
"format": "int32",
"type": "integer"
},
"type": "array",
"x-kubernetes-list-type": "set"
}
},
"required": [
"operator",
"values"
],
"type": "object"
},
"io.k8s.api.batch.v1.PodFailurePolicyOnPodConditionsPattern": {
"description": "PodFailurePolicyOnPodConditionsPattern describes a pattern for matching an actual pod condition type.",
"properties": {
"status": {
"default": "",
"description": "Specifies the required Pod condition status. To match a pod condition it is required that the specified status equals the pod condition status. Defaults to True.",
"type": "string"
},
"type": {
"default": "",
"description": "Specifies the required Pod condition type. To match a pod condition it is required that specified type equals the pod condition type.",
"type": "string"
}
},
"required": [
"type",
"status"
],
"type": "object"
},
"io.k8s.api.batch.v1.PodFailurePolicyRule": {
"description": "PodFailurePolicyRule describes how a pod failure is handled when the requirements are met. One of OnExitCodes and onPodConditions, but not both, can be used in each rule.",
"properties": {
"action": {
"default": "",
"description": "Specifies the action taken on a pod failure when the requirements are satisfied. Possible values are: - FailJob: indicates that the pod's job is marked as Failed and all\n running pods are terminated.\n- Ignore: indicates that the counter towards the .backoffLimit is not\n incremented and a replacement pod is created.\n- Count: indicates that the pod is handled in the default way - the\n counter towards the .backoffLimit is incremented.\nAdditional values are considered to be added in the future. Clients should react to an unknown action by skipping the rule.\n\n",
"type": "string"
},
"onExitCodes": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.batch.v1.PodFailurePolicyOnExitCodesRequirement"
}
],
"description": "Represents the requirement on the container exit codes."
},
"onPodConditions": {
"description": "Represents the requirement on the pod conditions. The requirement is represented as a list of pod condition patterns. The requirement is satisfied if at least one pattern matches an actual pod condition. At most 20 elements are allowed.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.batch.v1.PodFailurePolicyOnPodConditionsPattern"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
}
},
"required": [
"action",
"onPodConditions"
],
"type": "object"
},
"io.k8s.api.batch.v1.UncountedTerminatedPods": {
"description": "UncountedTerminatedPods holds UIDs of Pods that have terminated but haven't been accounted in Job status counters.",
"properties": {
Expand Down
Loading

0 comments on commit eefcf6a

Please sign in to comment.