Skip to content

Commit

Permalink
Merge pull request aws-samples#16 from aws-samples/cluster-autoscaling
Browse files Browse the repository at this point in the history
Cluster autoscaling
  • Loading branch information
dorukozturk authored Feb 2, 2023
2 parents 3e6d16c + 3c54343 commit 887ef5f
Show file tree
Hide file tree
Showing 34 changed files with 19,914 additions and 12 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## v0.8.0 (2023-02-02)

### Feat

- Add check for managed node groups
- Add check for CA role polp
- Add check for separate IRSA for CA
- Add check for CA autodiscovery
- Add check for CA-k8s version mismatch
- Add check for cluster-autoscaler or karpenter

## v0.7.2 (2023-01-11)

### Refactor
Expand Down
Empty file.
222 changes: 222 additions & 0 deletions hardeneks/cluster_wide/cluster_autoscaling/cluster_autoscaler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
import boto3
from kubernetes import client
from rich.panel import Panel

from hardeneks import console
from ...resources import Resources
from ...report import print_role_action_table, print_node_table


def _get_policy_documents_for_role(role_name, iam_client):
attached_policies = iam_client.list_attached_role_policies(
RoleName=role_name
)["AttachedPolicies"]
inline_policies = iam_client.list_role_policies(RoleName=role_name)[
"PolicyNames"
]
actions = []
for policy_arn in [x["PolicyArn"] for x in attached_policies]:
version_id = iam_client.get_policy(PolicyArn=policy_arn)["Policy"][
"DefaultVersionId"
]
response = iam_client.get_policy_version(
PolicyArn=policy_arn, VersionId=version_id
)["PolicyVersion"]["Document"]["Statement"]
for statement in response:
if type(statement["Action"]) == str:
actions.append(statement["Action"])
elif type(statement["Action"]) == list:
actions.extend(statement["Action"])
for policy_name in inline_policies:
response = iam_client.get_role_policy(
RoleName=role_name, PolicyName=policy_name
)["PolicyDocument"]["Statement"]
for statement in response:
if type(statement["Action"]) == str:
actions.append(statement["Action"])
elif type(statement["Action"]) == list:
actions.extend(statement["Action"])
return actions


def check_any_cluster_autoscaler_exists(resources: Resources):

deployments = [
i.metadata.name
for i in client.AppsV1Api().list_deployment_for_all_namespaces().items
]

if not ("cluster-autoscaler" in deployments or "karpenter" in deployments):
console.print(
Panel(
"[red]Cluster Autoscaler or Karpeneter is not deployed.",
subtitle="[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/]Click to see the guide[/link]",
)
)
console.print()
return False
else:
return True


def ensure_cluster_autoscaler_and_cluster_versions_match(resources: Resources):
eks_client = boto3.client("eks", region_name=resources.region)
cluster_metadata = eks_client.describe_cluster(name=resources.cluster)

cluster_version = cluster_metadata["cluster"]["version"]

deployments = client.AppsV1Api().list_deployment_for_all_namespaces().items

for deployment in deployments:
if deployment.metadata.name == "cluster-autoscaler":
ca_containers = deployment.spec.template.spec.containers
ca_image = ca_containers[0].image
ca_image_version = ca_image.split(":")[-1]
if cluster_version not in ca_image_version:
console.print(
Panel(
f"[red]CA({ca_image_version})-k8s({cluster_version}) Cross version compatibility is not recommended.",
subtitle="[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/#operating-the-cluster-autoscaler]Click to see the guide[/link]",
)
)
console.print()
return False
else:
return True


def ensure_cluster_autoscaler_has_autodiscovery_mode(resources: Resources):

deployments = client.AppsV1Api().list_deployment_for_all_namespaces().items

for deployment in deployments:
if deployment.metadata.name == "cluster-autoscaler":
ca_containers = deployment.spec.template.spec.containers
ca_command = ca_containers[0].command
if not any(
"node-group-auto-discover" in item for item in ca_command
):
console.print(
Panel(
"[red]Auto discovery is not enabled for Cluster Autoscaler",
subtitle="[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/#operating-the-cluster-autoscaler]Click to see the guide[/link]",
)
)
console.print()
return False
else:
break

return True


def use_separate_iam_role_for_cluster_autoscaler(resources: Resources):
deployments = client.AppsV1Api().list_deployment_for_all_namespaces().items

for deployment in deployments:
if deployment.metadata.name == "cluster-autoscaler":
service_account = (
deployment.spec.template.spec.service_account_name
)
sa_data = client.CoreV1Api().read_namespaced_service_account(
service_account, "kube-system", pretty="true"
)
if (
"eks.amazonaws.com/role-arn"
not in sa_data.metadata.annotations.keys()
):
console.print(
Panel(
"[red]Cluster-autoscaler deployment does not use a dedicated IAM Role (IRSA)",
subtitle="[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/#employ-least-privileged-access-to-the-iam-role]Click to see the guide[/link]",
)
)
console.print()
return False
else:
break

return True


def employ_least_privileged_access_cluster_autoscaler_role(
resources: Resources,
):
deployments = client.AppsV1Api().list_deployment_for_all_namespaces().items

iam_client = boto3.client("iam", region_name=resources.region)

ACTIONS = {
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeScalingActivities",
"autoscaling:DescribeTags",
"ec2:DescribeImages",
"ec2:DescribeInstanceTypes",
"ec2:DescribeLaunchTemplateVersions",
"ec2:GetInstanceTypesFromInstanceRequirements",
"eks:DescribeNodegroup",
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
}

for deployment in deployments:
if deployment.metadata.name == "cluster-autoscaler":
service_account = (
deployment.spec.template.spec.service_account_name
)
sa_data = client.CoreV1Api().read_namespaced_service_account(
service_account, "kube-system", pretty="true"
)
if (
"eks.amazonaws.com/role-arn"
not in sa_data.metadata.annotations.keys()
):
break
else:

sa_iam_role_arn = sa_data.metadata.annotations[
"eks.amazonaws.com/role-arn"
]
sa_iam_role = sa_iam_role_arn.split("/")[-1]
actions = _get_policy_documents_for_role(
sa_iam_role, iam_client
)

if len(set(actions) - ACTIONS) > 0:
print_role_action_table(
set(actions) - ACTIONS,
"[red]Cluster autoscaler role has unnecessary actions assigned.",
"[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/#employ-least-privileged-access-to-the-iam-role]Click to see the guide[/link]",
)
return False
else:
return True

return False


def use_managed_nodegroups(resources: Resources):

offenders = []
nodes = client.CoreV1Api().list_node().items

for node in nodes:
labels = node.metadata.labels
if "eks.amazonaws.com/nodegroup" in labels.keys():
pass
elif "alpha.eksctl.io/nodegroup-name" in labels.keys():
offenders.append(node)
elif "karpenter.sh/provisioner-name" in labels.keys():
pass
else:
offenders.append(node)

if offenders:
print_node_table(
offenders,
"[red]Following nodes are not part of a managed noge group.",
"[link=https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/#configuring-your-node-groups]Click to see the guide[/link]",
)
return offenders
17 changes: 8 additions & 9 deletions hardeneks/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ rules:
applications:
- check_metrics_server_is_running
- check_vertical_pod_autoscaler_exists
cluster_autoscaling:
cluster_autoscaler:
- check_any_cluster_autoscaler_exists
- ensure_cluster_autoscaler_and_cluster_versions_match
- ensure_cluster_autoscaler_has_autodiscovery_mode
- use_separate_iam_role_for_cluster_autoscaler
- employ_least_privileged_access_cluster_autoscaler_role
- use_managed_nodegroups
namespace_based:
security:
iam:
Expand Down Expand Up @@ -71,13 +79,4 @@ rules:
- schedule_replicas_across_nodes
- run_multiple_replicas
- avoid_running_singleton_pods
# data_plane:
# networking:
# vpc_subnets:
# vpc_cni:
# prefix_mode:
# ipv6:
# security_groups:
# custom_networking:
# load_balancing:

24 changes: 23 additions & 1 deletion hardeneks/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,18 @@ def print_instance_public_table(instances, message, docs):
str(instance["Instances"][0]["PublicDnsName"]),
)

console.print(Panel(table, title=message))
console.print(Panel(table, title=message, subtitle=docs))
console.print()


def print_node_table(nodes, message, docs):
table = Table()

table.add_column("NodeName", style="cyan")

for node in nodes:
table.add_row(node.metadata.name)
console.print(Panel(table, title=message, subtitle=docs))
console.print()


Expand Down Expand Up @@ -168,3 +179,14 @@ def print_persistent_volume_table(persistent_volumes, message, docs):

console.print(Panel(table, title=message, subtitle=docs))
console.print()


def print_role_action_table(actions, message, docs):
table = Table()
table.add_column("Action", style="cyan")

for action in actions:
table.add_row(action)

console.print(Panel(table, title=message, subtitle=docs))
console.print()
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "hardeneks"
version = "0.7.2"
version = "0.8.0"
description = ""
authors = ["Doruk Ozturk <[email protected]>"]
readme = "README.md"
Expand Down Expand Up @@ -41,7 +41,7 @@ exclude = '''

[tool.commitizen]
name = "cz_conventional_commits"
version = "0.7.2"
version = "0.8.0"
version_files = [
"pyproject.toml:[tool.commitizen]\nversion",
"pyproject.toml:[tool.poetry]\nname = \"commitizen\"\nversion",
Expand Down
Loading

0 comments on commit 887ef5f

Please sign in to comment.