Skip to content

Commit 92a0610

Browse files
committed
Merge tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 cpu updates from Borislav Petkov: - Add support for hardware-enforced cache coherency on AMD which obviates the need to flush cachelines before changing the PTE encryption bit (Krish Sadhukhan) - Add Centaur initialization support for families >= 7 (Tony W Wang-oc) - Add a feature flag for, and expose TSX suspend load tracking feature to KVM (Cathy Zhang) - Emulate SLDT and STR so that windows programs don't crash on UMIP machines (Brendan Shanks and Ricardo Neri) - Use the new SERIALIZE insn on Intel hardware which supports it (Ricardo Neri) - Misc cleanups and fixes * tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains x86/cpu: Add hardware-enforced cache coherency as a CPUID feature x86/cpu/centaur: Add Centaur family >=7 CPUs initialization support x86/cpu/centaur: Replace two-condition switch-case with an if statement x86/kvm: Expose TSX Suspend Load Tracking feature x86/cpufeatures: Enumerate TSX suspend load address tracking instructions x86/umip: Add emulation/spoofing for SLDT and STR instructions x86/cpu: Fix typos and improve the comments in sync_core() x86/cpu: Use XGETBV and XSETBV mnemonics in fpu/internal.h x86/cpu: Use SERIALIZE in sync_core() when available
2 parents ca1b669 + e1ebb2b commit 92a0610

10 files changed

Lines changed: 76 additions & 49 deletions

File tree

arch/x86/include/asm/cpufeatures.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
9797
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
9898
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
99-
/* free ( 3*32+17) */
99+
#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
100100
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
101101
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
102102
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -368,6 +368,7 @@
368368
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
369369
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
370370
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
371+
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
371372
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
372373
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
373374
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */

arch/x86/include/asm/fpu/internal.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
602602
{
603603
u32 eax, edx;
604604

605-
asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
606-
: "=a" (eax), "=d" (edx)
607-
: "c" (index));
605+
asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
608606
return eax + ((u64)edx << 32);
609607
}
610608

@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
613611
u32 eax = value;
614612
u32 edx = value >> 32;
615613

616-
asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
617-
: : "a" (eax), "d" (edx), "c" (index));
614+
asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
618615
}
619616

620617
#endif /* _ASM_X86_FPU_INTERNAL_H */

arch/x86/include/asm/special_insns.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)
234234

235235
#define nop() asm volatile ("nop")
236236

237+
static inline void serialize(void)
238+
{
239+
/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
240+
asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
241+
}
242+
237243
#endif /* __KERNEL__ */
238244

239245
#endif /* _ASM_X86_SPECIAL_INSNS_H */

arch/x86/include/asm/sync_core.h

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <linux/preempt.h>
66
#include <asm/processor.h>
77
#include <asm/cpufeature.h>
8+
#include <asm/special_insns.h>
89

910
#ifdef CONFIG_X86_32
1011
static inline void iret_to_self(void)
@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
4647
*
4748
* b) Text was modified on a different CPU, may subsequently be
4849
* executed on this CPU, and you want to make sure the new version
49-
* gets executed. This generally means you're calling this in a IPI.
50+
* gets executed. This generally means you're calling this in an IPI.
5051
*
5152
* If you're calling this for a different reason, you're probably doing
5253
* it wrong.
54+
*
55+
* Like all of Linux's memory ordering operations, this is a
56+
* compiler barrier as well.
5357
*/
5458
static inline void sync_core(void)
5559
{
5660
/*
57-
* There are quite a few ways to do this. IRET-to-self is nice
58-
* because it works on every CPU, at any CPL (so it's compatible
59-
* with paravirtualization), and it never exits to a hypervisor.
60-
* The only down sides are that it's a bit slow (it seems to be
61-
* a bit more than 2x slower than the fastest options) and that
62-
* it unmasks NMIs. The "push %cs" is needed because, in
63-
* paravirtual environments, __KERNEL_CS may not be a valid CS
64-
* value when we do IRET directly.
61+
* The SERIALIZE instruction is the most straightforward way to
62+
* do this, but it is not universally available.
63+
*/
64+
if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
65+
serialize();
66+
return;
67+
}
68+
69+
/*
70+
* For all other processors, there are quite a few ways to do this.
71+
* IRET-to-self is nice because it works on every CPU, at any CPL
72+
* (so it's compatible with paravirtualization), and it never exits
73+
* to a hypervisor. The only downsides are that it's a bit slow
74+
* (it seems to be a bit more than 2x slower than the fastest
75+
* options) and that it unmasks NMIs. The "push %cs" is needed,
76+
* because in paravirtual environments __KERNEL_CS may not be a
77+
* valid CS value when we do IRET directly.
6578
*
6679
* In case NMI unmasking or performance ever becomes a problem,
6780
* the next best option appears to be MOV-to-CR2 and an
@@ -71,9 +84,6 @@ static inline void sync_core(void)
7184
* CPUID is the conventional way, but it's nasty: it doesn't
7285
* exist on some 486-like CPUs, and it usually exits to a
7386
* hypervisor.
74-
*
75-
* Like all of Linux's memory ordering operations, this is a
76-
* compiler barrier as well.
7787
*/
7888
iret_to_self();
7989
}

arch/x86/kernel/cpu/centaur.c

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c)
6565
c->x86_cache_alignment = c->x86_clflush_size * 2;
6666
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
6767
}
68+
69+
if (c->x86 >= 7)
70+
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
6871
}
6972

7073
enum {
@@ -90,18 +93,15 @@ enum {
9093

9194
static void early_init_centaur(struct cpuinfo_x86 *c)
9295
{
93-
switch (c->x86) {
9496
#ifdef CONFIG_X86_32
95-
case 5:
96-
/* Emulate MTRRs using Centaur's MCR. */
97+
/* Emulate MTRRs using Centaur's MCR. */
98+
if (c->x86 == 5)
9799
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
98-
break;
99100
#endif
100-
case 6:
101-
if (c->x86_model >= 0xf)
102-
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
103-
break;
104-
}
101+
if ((c->x86 == 6 && c->x86_model >= 0xf) ||
102+
(c->x86 >= 7))
103+
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
104+
105105
#ifdef CONFIG_X86_64
106106
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
107107
#endif
@@ -145,9 +145,8 @@ static void init_centaur(struct cpuinfo_x86 *c)
145145
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
146146
}
147147

148-
switch (c->x86) {
149148
#ifdef CONFIG_X86_32
150-
case 5:
149+
if (c->x86 == 5) {
151150
switch (c->x86_model) {
152151
case 4:
153152
name = "C6";
@@ -207,12 +206,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
207206
c->x86_cache_size = (cc>>24)+(dd>>24);
208207
}
209208
sprintf(c->x86_model_id, "WinChip %s", name);
210-
break;
209+
}
211210
#endif
212-
case 6:
211+
if (c->x86 == 6 || c->x86 >= 7)
213212
init_c3(c);
214-
break;
215-
}
216213
#ifdef CONFIG_X86_64
217214
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
218215
#endif

arch/x86/kernel/cpu/scattered.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
4141
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
4242
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
4343
{ X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
44+
{ X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 },
4445
{ 0, 0, 0, 0, 0 }
4546
};
4647

arch/x86/kernel/umip.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@
4545
* value that, lies close to the top of the kernel memory. The limit for the GDT
4646
* and the IDT are set to zero.
4747
*
48-
* Given that SLDT and STR are not commonly used in programs that run on WineHQ
49-
* or DOSEMU2, they are not emulated.
50-
*
51-
* The instruction smsw is emulated to return the value that the register CR0
48+
* The instruction SMSW is emulated to return the value that the register CR0
5249
* has at boot time as set in the head_32.
50+
* SLDT and STR are emulated to return the values that the kernel programmatically
51+
* assigns:
52+
* - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
53+
* - STR returns (GDT_ENTRY_TSS * 8).
5354
*
5455
* Emulation is provided for both 32-bit and 64-bit processes.
5556
*
@@ -244,24 +245,41 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
244245
*data_size += UMIP_GDT_IDT_LIMIT_SIZE;
245246
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
246247

247-
} else if (umip_inst == UMIP_INST_SMSW) {
248-
unsigned long dummy_value = CR0_STATE;
248+
} else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
249+
umip_inst == UMIP_INST_STR) {
250+
unsigned long dummy_value;
251+
252+
if (umip_inst == UMIP_INST_SMSW) {
253+
dummy_value = CR0_STATE;
254+
} else if (umip_inst == UMIP_INST_STR) {
255+
dummy_value = GDT_ENTRY_TSS * 8;
256+
} else if (umip_inst == UMIP_INST_SLDT) {
257+
#ifdef CONFIG_MODIFY_LDT_SYSCALL
258+
down_read(&current->mm->context.ldt_usr_sem);
259+
if (current->mm->context.ldt)
260+
dummy_value = GDT_ENTRY_LDT * 8;
261+
else
262+
dummy_value = 0;
263+
up_read(&current->mm->context.ldt_usr_sem);
264+
#else
265+
dummy_value = 0;
266+
#endif
267+
}
249268

250269
/*
251-
* Even though the CR0 register has 4 bytes, the number
270+
* For these 3 instructions, the number
252271
* of bytes to be copied in the result buffer is determined
253272
* by whether the operand is a register or a memory location.
254273
* If operand is a register, return as many bytes as the operand
255274
* size. If operand is memory, return only the two least
256-
* siginificant bytes of CR0.
275+
* siginificant bytes.
257276
*/
258277
if (X86_MODRM_MOD(insn->modrm.value) == 3)
259278
*data_size = insn->opnd_bytes;
260279
else
261280
*data_size = 2;
262281

263282
memcpy(data, &dummy_value, *data_size);
264-
/* STR and SLDT are not emulated */
265283
} else {
266284
return -EINVAL;
267285
}
@@ -383,10 +401,6 @@ bool fixup_umip_exception(struct pt_regs *regs)
383401
umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
384402
umip_insns[umip_inst]);
385403

386-
/* Do not emulate (spoof) SLDT or STR. */
387-
if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
388-
return false;
389-
390404
umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
391405

392406
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ void kvm_set_cpu_caps(void)
371371
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
372372
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
373373
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
374-
F(SERIALIZE)
374+
F(SERIALIZE) | F(TSXLDTRK)
375375
);
376376

377377
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */

arch/x86/kvm/svm/sev.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
384384
uint8_t *page_virtual;
385385
unsigned long i;
386386

387-
if (npages == 0 || pages == NULL)
387+
if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
388+
pages == NULL)
388389
return;
389390

390391
for (i = 0; i < npages; i++) {

arch/x86/mm/pat/set_memory.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
19991999
/*
20002000
* Before changing the encryption attribute, we need to flush caches.
20012001
*/
2002-
cpa_flush(&cpa, 1);
2002+
cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
20032003

20042004
ret = __change_page_attr_set_clr(&cpa, 1);
20052005

0 commit comments

Comments
 (0)