Skip to content

Commit 452cddb

Browse files
Peter Zijlstraingomolnar
authored andcommitted
static_call: Add static_call_cond()
Extend the static_call infrastructure to optimize the following common pattern: if (func_ptr) func_ptr(args...) For the trampoline (which is in effect a tail-call), we patch the JMP.d32 into a RET, which then directly consumes the trampoline call. For the in-line sites we replace the CALL with a NOP5. NOTE: this is 'obviously' limited to functions with a 'void' return type. NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed to a full function. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org
1 parent c43a43e commit 452cddb

3 files changed

Lines changed: 127 additions & 13 deletions

File tree

arch/x86/include/asm/static_call.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,21 @@
2020
* it does tail-call optimization on the call; since you cannot compute the
2121
* relative displacement across sections.
2222
*/
23-
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
23+
24+
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
2425
asm(".pushsection .static_call.text, \"ax\" \n" \
2526
".align 4 \n" \
2627
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
2728
STATIC_CALL_TRAMP_STR(name) ": \n" \
28-
" .byte 0xe9 # jmp.d32 \n" \
29-
" .long " #func " - (. + 4) \n" \
29+
insns " \n" \
3030
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
3131
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
3232
".popsection \n")
3333

34+
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
35+
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
36+
37+
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
38+
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
39+
3440
#endif /* _ASM_STATIC_CALL_H */

arch/x86/kernel/static_call.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,52 @@
44
#include <linux/bug.h>
55
#include <asm/text-patching.h>
66

7-
static void __static_call_transform(void *insn, u8 opcode, void *func)
7+
enum insn_type {
8+
CALL = 0, /* site call */
9+
NOP = 1, /* site cond-call */
10+
JMP = 2, /* tramp / site tail-call */
11+
RET = 3, /* tramp / site cond-tail-call */
12+
};
13+
14+
static void __static_call_transform(void *insn, enum insn_type type, void *func)
815
{
9-
const void *code = text_gen_insn(opcode, insn, func);
16+
int size = CALL_INSN_SIZE;
17+
const void *code;
1018

11-
if (WARN_ONCE(*(u8 *)insn != opcode,
12-
"unexpected static call insn opcode 0x%x at %pS\n",
13-
opcode, insn))
14-
return;
19+
switch (type) {
20+
case CALL:
21+
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
22+
break;
23+
24+
case NOP:
25+
code = ideal_nops[NOP_ATOMIC5];
26+
break;
27+
28+
case JMP:
29+
code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
30+
break;
31+
32+
case RET:
33+
code = text_gen_insn(RET_INSN_OPCODE, insn, func);
34+
size = RET_INSN_SIZE;
35+
break;
36+
}
1537

16-
if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
38+
if (memcmp(insn, code, size) == 0)
1739
return;
1840

19-
text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
41+
text_poke_bp(insn, code, size, NULL);
2042
}
2143

2244
void arch_static_call_transform(void *site, void *tramp, void *func)
2345
{
2446
mutex_lock(&text_mutex);
2547

2648
if (tramp)
27-
__static_call_transform(tramp, JMP32_INSN_OPCODE, func);
49+
__static_call_transform(tramp, func ? JMP : RET, func);
2850

2951
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
30-
__static_call_transform(site, CALL_INSN_OPCODE, func);
52+
__static_call_transform(site, func ? CALL : NOP, func);
3153

3254
mutex_unlock(&text_mutex);
3355
}

include/linux/static_call.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
*
1717
* DECLARE_STATIC_CALL(name, func);
1818
* DEFINE_STATIC_CALL(name, func);
19+
* DEFINE_STATIC_CALL_NULL(name, typename);
1920
* static_call(name)(args...);
21+
* static_call_cond(name)(args...);
2022
* static_call_update(name, func);
2123
*
2224
* Usage example:
@@ -52,6 +54,43 @@
5254
* rather than calling through the trampoline. This requires objtool or a
5355
* compiler plugin to detect all the static_call() sites and annotate them
5456
* in the .static_call_sites section.
57+
*
58+
*
59+
* Notes on NULL function pointers:
60+
*
61+
* Static_call()s support NULL functions, with many of the caveats that
62+
* regular function pointers have.
63+
*
64+
* Clearly calling a NULL function pointer is 'BAD', so too for
65+
* static_call()s (although when HAVE_STATIC_CALL it might not be immediately
66+
* fatal). A NULL static_call can be the result of:
67+
*
68+
* DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
69+
*
70+
* which is equivalent to declaring a NULL function pointer with just a
71+
* typename:
72+
*
73+
* void (*my_func_ptr)(int arg1) = NULL;
74+
*
75+
* or using static_call_update() with a NULL function. In both cases the
76+
* HAVE_STATIC_CALL implementation will patch the trampoline with a RET
77+
* instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
78+
* architectures can patch the trampoline call to a NOP.
79+
*
80+
* In all cases, any argument evaluation is unconditional. Unlike a regular
81+
* conditional function pointer call:
82+
*
83+
* if (my_func_ptr)
84+
* my_func_ptr(arg1)
85+
*
86+
* where the argument evaludation also depends on the pointer value.
87+
*
88+
* When calling a static_call that can be NULL, use:
89+
*
90+
* static_call_cond(name)(arg1);
91+
*
92+
* which will include the required value tests to avoid NULL-pointer
93+
* dereferences.
5594
*/
5695

5796
#include <linux/types.h>
@@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end);
120159
}; \
121160
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
122161

162+
#define DEFINE_STATIC_CALL_NULL(name, _func) \
163+
DECLARE_STATIC_CALL(name, _func); \
164+
struct static_call_key STATIC_CALL_KEY(name) = { \
165+
.func = NULL, \
166+
.type = 1, \
167+
}; \
168+
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
169+
123170
#define static_call(name) __static_call(name)
171+
#define static_call_cond(name) (void)__static_call(name)
124172

125173
#define EXPORT_STATIC_CALL(name) \
126174
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
@@ -143,7 +191,15 @@ struct static_call_key {
143191
}; \
144192
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
145193

194+
#define DEFINE_STATIC_CALL_NULL(name, _func) \
195+
DECLARE_STATIC_CALL(name, _func); \
196+
struct static_call_key STATIC_CALL_KEY(name) = { \
197+
.func = NULL, \
198+
}; \
199+
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
200+
146201
#define static_call(name) __static_call(name)
202+
#define static_call_cond(name) (void)__static_call(name)
147203

148204
static inline
149205
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -179,9 +235,39 @@ struct static_call_key {
179235
.func = _func, \
180236
}
181237

238+
#define DEFINE_STATIC_CALL_NULL(name, _func) \
239+
DECLARE_STATIC_CALL(name, _func); \
240+
struct static_call_key STATIC_CALL_KEY(name) = { \
241+
.func = NULL, \
242+
}
243+
182244
#define static_call(name) \
183245
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
184246

247+
static inline void __static_call_nop(void) { }
248+
249+
/*
250+
* This horrific hack takes care of two things:
251+
*
252+
* - it ensures the compiler will only load the function pointer ONCE,
253+
* which avoids a reload race.
254+
*
255+
* - it ensures the argument evaluation is unconditional, similar
256+
* to the HAVE_STATIC_CALL variant.
257+
*
258+
* Sadly current GCC/Clang (10 for both) do not optimize this properly
259+
* and will emit an indirect call for the NULL case :-(
260+
*/
261+
#define __static_call_cond(name) \
262+
({ \
263+
void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \
264+
if (!func) \
265+
func = &__static_call_nop; \
266+
(typeof(STATIC_CALL_TRAMP(name))*)func; \
267+
})
268+
269+
#define static_call_cond(name) (void)__static_call_cond(name)
270+
185271
static inline
186272
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
187273
{

0 commit comments

Comments
 (0)