Skip to content

Commit

Permalink
Merge tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull x86 cache resource control updates from Borislav Petkov:

 - add logic to correct MBM total and local values fixing errata SKX99
   and BDF102 (Fenghua Yu)

 - cleanups

* tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Clean up unused function parameter in rmdir path
  x86/resctrl: Constify kernfs_ops
  x86/resctrl: Correct MBM total and local values
  Documentation/x86: Rename resctrl_ui.rst and add two errata to the file
  • Loading branch information
torvalds committed Dec 14, 2020
2 parents 405f868 + 19eb86a commit 8ba27ae
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Documentation/x86/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ x86-specific Documentation
pti
mds
microcode
resctrl_ui
resctrl
tsx_async_abort
usb-legacy-support
i386/index
Expand Down
93 changes: 93 additions & 0 deletions Documentation/x86/resctrl_ui.rst → Documentation/x86/resctrl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1209,3 +1209,96 @@ View the llc occupancy snapshot::

# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
11234000

Intel RDT Errata
================

Intel MBM Counters May Report System Memory Bandwidth Incorrectly
-----------------------------------------------------------------

Errata SKX99 for Skylake server and BDF102 for Broadwell server.

Problem: Intel Memory Bandwidth Monitoring (MBM) counters track metrics
according to the assigned Resource Monitor ID (RMID) for that logical
core. The IA32_QM_CTR register (MSR 0xC8E), used to report these
metrics, may report incorrect system bandwidth for certain RMID values.

Implication: Due to the errata, system memory bandwidth may not match
what is reported.

Workaround: MBM total and local readings are corrected according to the
following correction factor table:

+---------------+---------------+---------------+-----------------+
|core count |rmid count |rmid threshold |correction factor|
+---------------+---------------+---------------+-----------------+
|1 |8 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|2 |16 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|3 |24 |15 |0.969650 |
+---------------+---------------+---------------+-----------------+
|4 |32 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|6 |48 |31 |0.969650 |
+---------------+---------------+---------------+-----------------+
|7 |56 |47 |1.142857 |
+---------------+---------------+---------------+-----------------+
|8 |64 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|9 |72 |63 |1.185115 |
+---------------+---------------+---------------+-----------------+
|10 |80 |63 |1.066553 |
+---------------+---------------+---------------+-----------------+
|11 |88 |79 |1.454545 |
+---------------+---------------+---------------+-----------------+
|12 |96 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|13 |104 |95 |1.230769 |
+---------------+---------------+---------------+-----------------+
|14 |112 |95 |1.142857 |
+---------------+---------------+---------------+-----------------+
|15 |120 |95 |1.066667 |
+---------------+---------------+---------------+-----------------+
|16 |128 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|17 |136 |127 |1.254863 |
+---------------+---------------+---------------+-----------------+
|18 |144 |127 |1.185255 |
+---------------+---------------+---------------+-----------------+
|19 |152 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|20 |160 |127 |1.066667 |
+---------------+---------------+---------------+-----------------+
|21 |168 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|22 |176 |159 |1.454334 |
+---------------+---------------+---------------+-----------------+
|23 |184 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|24 |192 |127 |0.969744 |
+---------------+---------------+---------------+-----------------+
|25 |200 |191 |1.280246 |
+---------------+---------------+---------------+-----------------+
|26 |208 |191 |1.230921 |
+---------------+---------------+---------------+-----------------+
|27 |216 |0 |1.000000 |
+---------------+---------------+---------------+-----------------+
|28 |224 |191 |1.143118 |
+---------------+---------------+---------------+-----------------+

If rmid > rmid threshold, MBM total and local values should be multiplied
by the correction factor.

See:

1. Erratum SKX99 in Intel Xeon Processor Scalable Family Specification Update:
http://web.archive.org/web/20200716124958/https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-spec-update.html

2. Erratum BDF102 in Intel Xeon E5-2600 v4 Processor Product Family Specification Update:
http://web.archive.org/web/20191125200531/https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-v4-spec-update.pdf

3. The errata in Intel Resource Director Technology (Intel RDT) on 2nd Generation Intel Xeon Scalable Processors Reference Manual:
https://software.intel.com/content/www/us/en/develop/articles/intel-resource-director-technology-rdt-reference-manual.html

for further information.
4 changes: 4 additions & 0 deletions arch/x86/kernel/cpu/resctrl/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,10 @@ static __init void __check_quirks_intel(void)
set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
else
set_rdt_options("!l3cat");
fallthrough;
case INTEL_FAM6_BROADWELL_X:
intel_rdt_mbm_apply_quirk();
break;
}
}

Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/resctrl/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ void __exit rdtgroup_exit(void);
struct rftype {
char *name;
umode_t mode;
struct kernfs_ops *kf_ops;
const struct kernfs_ops *kf_ops;
unsigned long flags;
unsigned long fflags;

Expand Down Expand Up @@ -619,6 +619,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
Expand Down
82 changes: 80 additions & 2 deletions arch/x86/kernel/cpu/resctrl/monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,69 @@ unsigned int rdt_mon_features;
*/
unsigned int resctrl_cqm_threshold;

#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))

/*
* The correction factor table is documented in Documentation/x86/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
* by the correction factor.
*
* The original table is modified for better code:
*
* 1. The threshold 0 is changed to rmid count - 1 so don't do correction
* for the case.
* 2. MBM total and local correction table indexed by core counter which is
* equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27.
* 3. The correction factor is normalized to 2^20 (1048576) so it's faster
* to calculate corrected value by shifting:
* corrected_value = (original_value * correction_factor) >> 20
*/
static const struct mbm_correction_factor_table {
u32 rmidthreshold;
u64 cf;
} mbm_cf_table[] __initdata = {
{7, CF(1.000000)},
{15, CF(1.000000)},
{15, CF(0.969650)},
{31, CF(1.000000)},
{31, CF(1.066667)},
{31, CF(0.969650)},
{47, CF(1.142857)},
{63, CF(1.000000)},
{63, CF(1.185115)},
{63, CF(1.066553)},
{79, CF(1.454545)},
{95, CF(1.000000)},
{95, CF(1.230769)},
{95, CF(1.142857)},
{95, CF(1.066667)},
{127, CF(1.000000)},
{127, CF(1.254863)},
{127, CF(1.185255)},
{151, CF(1.000000)},
{127, CF(1.066667)},
{167, CF(1.000000)},
{159, CF(1.454334)},
{183, CF(1.000000)},
{127, CF(0.969744)},
{191, CF(1.280246)},
{191, CF(1.230921)},
{215, CF(1.000000)},
{191, CF(1.143118)},
};

static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
static u64 mbm_cf __read_mostly;

static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
{
/* Correct MBM value. */
if (rmid > mbm_cf_rmidthreshold)
val = (val * mbm_cf) >> 20;

return val;
}

static inline struct rmid_entry *__rmid_entry(u32 rmid)
{
struct rmid_entry *entry;
Expand Down Expand Up @@ -260,7 +323,8 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
m->chunks += chunks;
m->prev_msr = tval;

rr->val += m->chunks;
rr->val += get_corrected_mbm_count(rmid, m->chunks);

return 0;
}

Expand All @@ -279,7 +343,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
return;

chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
cur_bw = (chunks * r->mon_scale) >> 20;
cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;

if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
Expand Down Expand Up @@ -642,3 +706,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)

return 0;
}

void __init intel_rdt_mbm_apply_quirk(void)
{
int cf_index;

cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
pr_info("No MBM correction factor available\n");
return;
}

mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
mbm_cf = mbm_cf_table[cf_index].cf;
}
21 changes: 9 additions & 12 deletions arch/x86/kernel/cpu/resctrl/rdtgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
return -EINVAL;
}

static struct kernfs_ops rdtgroup_kf_single_ops = {
static const struct kernfs_ops rdtgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = rdtgroup_file_write,
.seq_show = rdtgroup_seqfile_show,
};

static struct kernfs_ops kf_mondata_ops = {
static const struct kernfs_ops kf_mondata_ops = {
.atomic_write_len = PAGE_SIZE,
.seq_show = rdtgroup_mondata_show,
};
Expand Down Expand Up @@ -3023,8 +3023,7 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
return -EPERM;
}

static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
cpumask_var_t tmpmask)
static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
int cpu;
Expand Down Expand Up @@ -3056,8 +3055,7 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
return 0;
}

static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
struct rdtgroup *rdtgrp)
static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
{
rdtgrp->flags = RDT_DELETED;
list_del(&rdtgrp->rdtgroup_list);
Expand All @@ -3066,8 +3064,7 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
return 0;
}

static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
cpumask_var_t tmpmask)
static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
{
int cpu;

Expand All @@ -3094,7 +3091,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);

rdtgroup_ctrl_remove(kn, rdtgrp);
rdtgroup_ctrl_remove(rdtgrp);

/*
* Free all the child monitor group rmids.
Expand Down Expand Up @@ -3131,13 +3128,13 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
rdtgrp != &rdtgroup_default) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = rdtgroup_ctrl_remove(kn, rdtgrp);
ret = rdtgroup_ctrl_remove(rdtgrp);
} else {
ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
}
} else if (rdtgrp->type == RDTMON_GROUP &&
is_mon_groups(parent_kn, kn->name)) {
ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
} else {
ret = -EPERM;
}
Expand Down

0 comments on commit 8ba27ae

Please sign in to comment.