Skip to content

Commit 64743e6

Browse files
committed
Merge tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 cache resource control updates from Borislav Petkov: - Misc cleanups to the resctrl code in preparation for the ARM side (James Morse) - Add support for controlling per-thread memory bandwidth throttling delay values on hw which supports it (Fenghua Yu) * tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/resctrl: Enable user to view thread or core throttling mode x86/resctrl: Enumerate per-thread MBA controls cacheinfo: Move resctrl's get_cache_id() to the cacheinfo header file x86/resctrl: Add struct rdt_cache::arch_has_{sparse, empty}_bitmaps x86/resctrl: Merge AMD/Intel parse_bw() calls x86/resctrl: Add struct rdt_membw::arch_needs_linear to explain AMD/Intel MBA difference x86/resctrl: Use is_closid_match() in more places x86/resctrl: Include pid.h x86/resctrl: Use container_of() in delayed_work handlers x86/resctrl: Fix stale comment x86/resctrl: Remove struct rdt_membw::max_delay x86/resctrl: Remove unused struct mbm_state::chunks_bw
2 parents f94ab23 + 29b6bd4 commit 64743e6

11 files changed

Lines changed: 185 additions & 157 deletions

File tree

Documentation/x86/resctrl_ui.rst

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,18 @@ with respect to allocation:
138138
non-linear. This field is purely informational
139139
only.
140140

141+
"thread_throttle_mode":
142+
Indicator on Intel systems of how tasks running on threads
143+
of a physical core are throttled in cases where they
144+
request different memory bandwidth percentages:
145+
146+
"max":
147+
the smallest percentage is applied
148+
to all threads
149+
"per-thread":
150+
bandwidth percentages are directly applied to
151+
the threads running on the core
152+
141153
If RDT monitoring is available there will be an "L3_MON" directory
142154
with the following files:
143155

@@ -364,8 +376,10 @@ to the next control step available on the hardware.
364376

365377
The bandwidth throttling is a core specific mechanism on some of Intel
366378
SKUs. Using a high bandwidth and a low bandwidth setting on two threads
367-
sharing a core will result in both threads being throttled to use the
368-
low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
379+
sharing a core may result in both threads being throttled to use the
380+
low bandwidth (see "thread_throttle_mode").
381+
382+
The fact that Memory bandwidth allocation(MBA) may be a core
369383
specific mechanism where as memory bandwidth monitoring(MBM) is done at
370384
the package level may lead to confusion when users try to apply control
371385
via the MBA and then monitor the bandwidth to see if the controls are

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@
288288
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
289289
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
290290
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
291+
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
291292

292293
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
293294
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */

arch/x86/kernel/cpu/cpuid-deps.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ static const struct cpuid_dep cpuid_deps[] = {
7070
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
7171
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
7272
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
73+
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
7374
{}
7475
};
7576

arch/x86/kernel/cpu/resctrl/core.c

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
168168
.name = "MB",
169169
.domains = domain_init(RDT_RESOURCE_MBA),
170170
.cache_level = 3,
171+
.parse_ctrlval = parse_bw,
171172
.format_str = "%d=%*u",
172173
.fflags = RFTYPE_RES_MB,
173174
},
@@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
254255
{
255256
union cpuid_0x10_3_eax eax;
256257
union cpuid_0x10_x_edx edx;
257-
u32 ebx, ecx;
258+
u32 ebx, ecx, max_delay;
258259

259260
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
260261
r->num_closid = edx.split.cos_max + 1;
261-
r->membw.max_delay = eax.split.max_delay + 1;
262+
max_delay = eax.split.max_delay + 1;
262263
r->default_ctrl = MAX_MBA_BW;
264+
r->membw.arch_needs_linear = true;
263265
if (ecx & MBA_IS_LINEAR) {
264266
r->membw.delay_linear = true;
265-
r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
266-
r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
267+
r->membw.min_bw = MAX_MBA_BW - max_delay;
268+
r->membw.bw_gran = MAX_MBA_BW - max_delay;
267269
} else {
268270
if (!rdt_get_mb_table(r))
269271
return false;
272+
r->membw.arch_needs_linear = false;
270273
}
271274
r->data_width = 3;
272275

276+
if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
277+
r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
278+
else
279+
r->membw.throttle_mode = THREAD_THROTTLE_MAX;
280+
thread_throttle_mode_init();
281+
273282
r->alloc_capable = true;
274283
r->alloc_enabled = true;
275284

@@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
288297

289298
/* AMD does not use delay */
290299
r->membw.delay_linear = false;
300+
r->membw.arch_needs_linear = false;
291301

302+
/*
303+
* AMD does not use memory delay throttle model to control
304+
* the allocation like Intel does.
305+
*/
306+
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
292307
r->membw.min_bw = 0;
293308
r->membw.bw_gran = 1;
294309
/* Max value is 2048, Data width should be 4 in decimal */
@@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
346361
rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
347362
}
348363

349-
static int get_cache_id(int cpu, int level)
350-
{
351-
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
352-
int i;
353-
354-
for (i = 0; i < ci->num_leaves; i++) {
355-
if (ci->info_list[i].level == level)
356-
return ci->info_list[i].id;
357-
}
358-
359-
return -1;
360-
}
361-
362364
static void
363365
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
364366
{
@@ -556,7 +558,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
556558
*/
557559
static void domain_add_cpu(int cpu, struct rdt_resource *r)
558560
{
559-
int id = get_cache_id(cpu, r->cache_level);
561+
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
560562
struct list_head *add_pos = NULL;
561563
struct rdt_domain *d;
562564

@@ -602,7 +604,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
602604

603605
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
604606
{
605-
int id = get_cache_id(cpu, r->cache_level);
607+
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
606608
struct rdt_domain *d;
607609

608610
d = rdt_find_domain(r, id, NULL);
@@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
918920
r->rid == RDT_RESOURCE_L3CODE ||
919921
r->rid == RDT_RESOURCE_L2 ||
920922
r->rid == RDT_RESOURCE_L2DATA ||
921-
r->rid == RDT_RESOURCE_L2CODE)
922-
r->cbm_validate = cbm_validate_intel;
923-
else if (r->rid == RDT_RESOURCE_MBA) {
923+
r->rid == RDT_RESOURCE_L2CODE) {
924+
r->cache.arch_has_sparse_bitmaps = false;
925+
r->cache.arch_has_empty_bitmaps = false;
926+
} else if (r->rid == RDT_RESOURCE_MBA) {
924927
r->msr_base = MSR_IA32_MBA_THRTL_BASE;
925928
r->msr_update = mba_wrmsr_intel;
926-
r->parse_ctrlval = parse_bw_intel;
927929
}
928930
}
929931
}
@@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
938940
r->rid == RDT_RESOURCE_L3CODE ||
939941
r->rid == RDT_RESOURCE_L2 ||
940942
r->rid == RDT_RESOURCE_L2DATA ||
941-
r->rid == RDT_RESOURCE_L2CODE)
942-
r->cbm_validate = cbm_validate_amd;
943-
else if (r->rid == RDT_RESOURCE_MBA) {
943+
r->rid == RDT_RESOURCE_L2CODE) {
944+
r->cache.arch_has_sparse_bitmaps = true;
945+
r->cache.arch_has_empty_bitmaps = true;
946+
} else if (r->rid == RDT_RESOURCE_MBA) {
944947
r->msr_base = MSR_IA32_MBA_BW_BASE;
945948
r->msr_update = mba_wrmsr_amd;
946-
r->parse_ctrlval = parse_bw_amd;
947949
}
948950
}
949951
}

arch/x86/kernel/cpu/resctrl/ctrlmondata.c

Lines changed: 13 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -21,53 +21,6 @@
2121
#include <linux/slab.h>
2222
#include "internal.h"
2323

24-
/*
25-
* Check whether MBA bandwidth percentage value is correct. The value is
26-
* checked against the minimum and maximum bandwidth values specified by
27-
* the hardware. The allocated bandwidth percentage is rounded to the next
28-
* control step available on the hardware.
29-
*/
30-
static bool bw_validate_amd(char *buf, unsigned long *data,
31-
struct rdt_resource *r)
32-
{
33-
unsigned long bw;
34-
int ret;
35-
36-
ret = kstrtoul(buf, 10, &bw);
37-
if (ret) {
38-
rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
39-
return false;
40-
}
41-
42-
if (bw < r->membw.min_bw || bw > r->default_ctrl) {
43-
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
44-
r->membw.min_bw, r->default_ctrl);
45-
return false;
46-
}
47-
48-
*data = roundup(bw, (unsigned long)r->membw.bw_gran);
49-
return true;
50-
}
51-
52-
int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
53-
struct rdt_domain *d)
54-
{
55-
unsigned long bw_val;
56-
57-
if (d->have_new_ctrl) {
58-
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
59-
return -EINVAL;
60-
}
61-
62-
if (!bw_validate_amd(data->buf, &bw_val, r))
63-
return -EINVAL;
64-
65-
d->new_ctrl = bw_val;
66-
d->have_new_ctrl = true;
67-
68-
return 0;
69-
}
70-
7124
/*
7225
* Check whether MBA bandwidth percentage value is correct. The value is
7326
* checked against the minimum and max bandwidth values specified by the
@@ -82,7 +35,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
8235
/*
8336
* Only linear delay values is supported for current Intel SKUs.
8437
*/
85-
if (!r->membw.delay_linear) {
38+
if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
8639
rdt_last_cmd_puts("No support for non-linear MB domains\n");
8740
return false;
8841
}
@@ -104,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
10457
return true;
10558
}
10659

107-
int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
108-
struct rdt_domain *d)
60+
int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
61+
struct rdt_domain *d)
10962
{
11063
unsigned long bw_val;
11164

@@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
12376
}
12477

12578
/*
126-
* Check whether a cache bit mask is valid. The SDM says:
79+
* Check whether a cache bit mask is valid.
80+
* For Intel the SDM says:
12781
* Please note that all (and only) contiguous '1' combinations
12882
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
12983
* Additionally Haswell requires at least two bits set.
84+
* AMD allows non-contiguous bitmasks.
13085
*/
131-
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
86+
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
13287
{
13388
unsigned long first_bit, zero_bit, val;
13489
unsigned int cbm_len = r->cache.cbm_len;
@@ -140,15 +95,18 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
14095
return false;
14196
}
14297

143-
if (val == 0 || val > r->default_ctrl) {
98+
if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
99+
val > r->default_ctrl) {
144100
rdt_last_cmd_puts("Mask out of range\n");
145101
return false;
146102
}
147103

148104
first_bit = find_first_bit(&val, cbm_len);
149105
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
150106

151-
if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
107+
/* Are non-contiguous bitmaps allowed? */
108+
if (!r->cache.arch_has_sparse_bitmaps &&
109+
(find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
152110
rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
153111
return false;
154112
}
@@ -163,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
163121
return true;
164122
}
165123

166-
/*
167-
* Check whether a cache bit mask is valid. AMD allows non-contiguous
168-
* bitmasks
169-
*/
170-
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
171-
{
172-
unsigned long val;
173-
int ret;
174-
175-
ret = kstrtoul(buf, 16, &val);
176-
if (ret) {
177-
rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
178-
return false;
179-
}
180-
181-
if (val > r->default_ctrl) {
182-
rdt_last_cmd_puts("Mask out of range\n");
183-
return false;
184-
}
185-
186-
*data = val;
187-
return true;
188-
}
189-
190124
/*
191125
* Read one cache bit mask (hex). Check that it is valid for the current
192126
* resource type.
@@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
212146
return -EINVAL;
213147
}
214148

215-
if (!r->cbm_validate(data->buf, &cbm_val, r))
149+
if (!cbm_validate(data->buf, &cbm_val, r))
216150
return -EINVAL;
217151

218152
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||

0 commit comments

Comments
 (0)