Skip to content

Commit

Permalink
Merge tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull x86 cache resource control updates from Borislav Petkov:

 - Misc cleanups to the resctrl code in preparation for the ARM side
   (James Morse)

 - Add support for controlling per-thread memory bandwidth throttling
   delay values on hw which supports it (Fenghua Yu)

* tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Enable user to view thread or core throttling mode
  x86/resctrl: Enumerate per-thread MBA controls
  cacheinfo: Move resctrl's get_cache_id() to the cacheinfo header file
  x86/resctrl: Add struct rdt_cache::arch_has_{sparse, empty}_bitmaps
  x86/resctrl: Merge AMD/Intel parse_bw() calls
  x86/resctrl: Add struct rdt_membw::arch_needs_linear to explain AMD/Intel MBA difference
  x86/resctrl: Use is_closid_match() in more places
  x86/resctrl: Include pid.h
  x86/resctrl: Use container_of() in delayed_work handlers
  x86/resctrl: Fix stale comment
  x86/resctrl: Remove struct rdt_membw::max_delay
  x86/resctrl: Remove unused struct mbm_state::chunks_bw
  • Loading branch information
torvalds committed Oct 12, 2020
2 parents f94ab23 + 29b6bd4 commit 64743e6
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 157 deletions.
18 changes: 16 additions & 2 deletions Documentation/x86/resctrl_ui.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,18 @@ with respect to allocation:
non-linear. This field is purely informational
only.

"thread_throttle_mode":
Indicator on Intel systems of how tasks running on threads
of a physical core are throttled in cases where they
request different memory bandwidth percentages:

"max":
the smallest percentage is applied
to all threads
"per-thread":
bandwidth percentages are directly applied to
the threads running on the core

If RDT monitoring is available there will be an "L3_MON" directory
with the following files:

Expand Down Expand Up @@ -364,8 +376,10 @@ to the next control step available on the hardware.

The bandwidth throttling is a core specific mechanism on some of Intel
SKUs. Using a high bandwidth and a low bandwidth setting on two threads
sharing a core will result in both threads being throttled to use the
low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
sharing a core may result in both threads being throttled to use the
low bandwidth (see "thread_throttle_mode").

The fact that Memory bandwidth allocation(MBA) may be a core
specific mechanism where as memory bandwidth monitoring(MBM) is done at
the package level may lead to confusion when users try to apply control
via the MBA and then monitor the bandwidth to see if the controls are
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/cpuid-deps.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
{}
};

Expand Down
56 changes: 29 additions & 27 deletions arch/x86/kernel/cpu/resctrl/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
.cache_level = 3,
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
Expand Down Expand Up @@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
{
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
u32 ebx, ecx, max_delay;

cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1;
r->membw.max_delay = eax.split.max_delay + 1;
max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW;
r->membw.arch_needs_linear = true;
if (ecx & MBA_IS_LINEAR) {
r->membw.delay_linear = true;
r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
r->membw.min_bw = MAX_MBA_BW - max_delay;
r->membw.bw_gran = MAX_MBA_BW - max_delay;
} else {
if (!rdt_get_mb_table(r))
return false;
r->membw.arch_needs_linear = false;
}
r->data_width = 3;

if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
else
r->membw.throttle_mode = THREAD_THROTTLE_MAX;
thread_throttle_mode_init();

r->alloc_capable = true;
r->alloc_enabled = true;

Expand All @@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)

/* AMD does not use delay */
r->membw.delay_linear = false;
r->membw.arch_needs_linear = false;

/*
* AMD does not use memory delay throttle model to control
* the allocation like Intel does.
*/
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
r->membw.min_bw = 0;
r->membw.bw_gran = 1;
/* Max value is 2048, Data width should be 4 in decimal */
Expand Down Expand Up @@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
}

static int get_cache_id(int cpu, int level)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
int i;

for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == level)
return ci->info_list[i].id;
}

return -1;
}

static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
Expand Down Expand Up @@ -556,7 +558,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cache_id(cpu, r->cache_level);
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
struct rdt_domain *d;

Expand Down Expand Up @@ -602,7 +604,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cache_id(cpu, r->cache_level);
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct rdt_domain *d;

d = rdt_find_domain(r, id, NULL);
Expand Down Expand Up @@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA ||
r->rid == RDT_RESOURCE_L2CODE)
r->cbm_validate = cbm_validate_intel;
else if (r->rid == RDT_RESOURCE_MBA) {
r->rid == RDT_RESOURCE_L2CODE) {
r->cache.arch_has_sparse_bitmaps = false;
r->cache.arch_has_empty_bitmaps = false;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_THRTL_BASE;
r->msr_update = mba_wrmsr_intel;
r->parse_ctrlval = parse_bw_intel;
}
}
}
Expand All @@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA ||
r->rid == RDT_RESOURCE_L2CODE)
r->cbm_validate = cbm_validate_amd;
else if (r->rid == RDT_RESOURCE_MBA) {
r->rid == RDT_RESOURCE_L2CODE) {
r->cache.arch_has_sparse_bitmaps = true;
r->cache.arch_has_empty_bitmaps = true;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_BW_BASE;
r->msr_update = mba_wrmsr_amd;
r->parse_ctrlval = parse_bw_amd;
}
}
}
Expand Down
92 changes: 13 additions & 79 deletions arch/x86/kernel/cpu/resctrl/ctrlmondata.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,53 +21,6 @@
#include <linux/slab.h>
#include "internal.h"

/*
* Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and maximum bandwidth values specified by
* the hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware.
*/
static bool bw_validate_amd(char *buf, unsigned long *data,
struct rdt_resource *r)
{
unsigned long bw;
int ret;

ret = kstrtoul(buf, 10, &bw);
if (ret) {
rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
return false;
}

if (bw < r->membw.min_bw || bw > r->default_ctrl) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
}

*data = roundup(bw, (unsigned long)r->membw.bw_gran);
return true;
}

int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
{
unsigned long bw_val;

if (d->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}

if (!bw_validate_amd(data->buf, &bw_val, r))
return -EINVAL;

d->new_ctrl = bw_val;
d->have_new_ctrl = true;

return 0;
}

/*
* Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and max bandwidth values specified by the
Expand All @@ -82,7 +35,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
/*
* Only linear delay values is supported for current Intel SKUs.
*/
if (!r->membw.delay_linear) {
if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
rdt_last_cmd_puts("No support for non-linear MB domains\n");
return false;
}
Expand All @@ -104,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}

int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
{
unsigned long bw_val;

Expand All @@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
}

/*
* Check whether a cache bit mask is valid. The SDM says:
* Check whether a cache bit mask is valid.
* For Intel the SDM says:
* Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
* AMD allows non-contiguous bitmasks.
*/
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
Expand All @@ -140,15 +95,18 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return false;
}

if (val == 0 || val > r->default_ctrl) {
if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}

first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);

if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
/* Are non-contiguous bitmaps allowed? */
if (!r->cache.arch_has_sparse_bitmaps &&
(find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
return false;
}
Expand All @@ -163,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return true;
}

/*
* Check whether a cache bit mask is valid. AMD allows non-contiguous
* bitmasks
*/
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long val;
int ret;

ret = kstrtoul(buf, 16, &val);
if (ret) {
rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
return false;
}

if (val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}

*data = val;
return true;
}

/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
Expand All @@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
return -EINVAL;
}

if (!r->cbm_validate(data->buf, &cbm_val, r))
if (!cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;

if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
Expand Down
Loading

0 comments on commit 64743e6

Please sign in to comment.