2020#include <linux/sched.h>
2121#include <linux/sched/idle.h>
2222#include <linux/hypervisor.h>
23+ #include <linux/sched/clock.h>
24+ #include <linux/nmi.h>
25+ #include <linux/sched/debug.h>
2326
2427#include "smpboot.h"
2528#include "sched/smp.h"
@@ -96,17 +99,134 @@ void __init call_function_init(void)
9699 smpcfd_prepare_cpu (smp_processor_id ());
97100}
98101
102+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
103+
104+ static DEFINE_PER_CPU (call_single_data_t * , cur_csd ) ;
105+ static DEFINE_PER_CPU (smp_call_func_t , cur_csd_func ) ;
106+ static DEFINE_PER_CPU (void * , cur_csd_info ) ;
107+
108+ #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
109+ static atomic_t csd_bug_count = ATOMIC_INIT (0 );
110+
111+ /* Record current CSD work for current CPU, NULL to erase. */
112+ static void csd_lock_record (call_single_data_t * csd )
113+ {
114+ if (!csd ) {
115+ smp_mb (); /* NULL cur_csd after unlock. */
116+ __this_cpu_write (cur_csd , NULL );
117+ return ;
118+ }
119+ __this_cpu_write (cur_csd_func , csd -> func );
120+ __this_cpu_write (cur_csd_info , csd -> info );
121+ smp_wmb (); /* func and info before csd. */
122+ __this_cpu_write (cur_csd , csd );
123+ smp_mb (); /* Update cur_csd before function call. */
124+ /* Or before unlock, as the case may be. */
125+ }
126+
127+ static __always_inline int csd_lock_wait_getcpu (call_single_data_t * csd )
128+ {
129+ unsigned int csd_type ;
130+
131+ csd_type = CSD_TYPE (csd );
132+ if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC )
133+ return csd -> dst ; /* Other CSD_TYPE_ values might not have ->dst. */
134+ return -1 ;
135+ }
136+
137+ /*
138+ * Complain if too much time spent waiting. Note that only
139+ * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
140+ * so waiting on other types gets much less information.
141+ */
142+ static __always_inline bool csd_lock_wait_toolong (call_single_data_t * csd , u64 ts0 , u64 * ts1 , int * bug_id )
143+ {
144+ int cpu = -1 ;
145+ int cpux ;
146+ bool firsttime ;
147+ u64 ts2 , ts_delta ;
148+ call_single_data_t * cpu_cur_csd ;
149+ unsigned int flags = READ_ONCE (csd -> flags );
150+
151+ if (!(flags & CSD_FLAG_LOCK )) {
152+ if (!unlikely (* bug_id ))
153+ return true;
154+ cpu = csd_lock_wait_getcpu (csd );
155+ pr_alert ("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n" ,
156+ * bug_id , raw_smp_processor_id (), cpu );
157+ return true;
158+ }
159+
160+ ts2 = sched_clock ();
161+ ts_delta = ts2 - * ts1 ;
162+ if (likely (ts_delta <= CSD_LOCK_TIMEOUT ))
163+ return false;
164+
165+ firsttime = !* bug_id ;
166+ if (firsttime )
167+ * bug_id = atomic_inc_return (& csd_bug_count );
168+ cpu = csd_lock_wait_getcpu (csd );
169+ if (WARN_ONCE (cpu < 0 || cpu >= nr_cpu_ids , "%s: cpu = %d\n" , __func__ , cpu ))
170+ cpux = 0 ;
171+ else
172+ cpux = cpu ;
173+ cpu_cur_csd = smp_load_acquire (& per_cpu (cur_csd , cpux )); /* Before func and info. */
174+ pr_alert ("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n" ,
175+ firsttime ? "Detected" : "Continued" , * bug_id , raw_smp_processor_id (), ts2 - ts0 ,
176+ cpu , csd -> func , csd -> info );
177+ if (cpu_cur_csd && csd != cpu_cur_csd ) {
178+ pr_alert ("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n" ,
179+ * bug_id , READ_ONCE (per_cpu (cur_csd_func , cpux )),
180+ READ_ONCE (per_cpu (cur_csd_info , cpux )));
181+ } else {
182+ pr_alert ("\tcsd: CSD lock (#%d) %s.\n" ,
183+ * bug_id , !cpu_cur_csd ? "unresponsive" : "handling this request" );
184+ }
185+ if (cpu >= 0 ) {
186+ if (!trigger_single_cpu_backtrace (cpu ))
187+ dump_cpu_task (cpu );
188+ if (!cpu_cur_csd ) {
189+ pr_alert ("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n" , * bug_id , raw_smp_processor_id (), cpu );
190+ arch_send_call_function_single_ipi (cpu );
191+ }
192+ }
193+ dump_stack ();
194+ * ts1 = ts2 ;
195+
196+ return false;
197+ }
198+
99199/*
100200 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
101201 *
102202 * For non-synchronous ipi calls the csd can still be in use by the
103203 * previous function call. For multi-cpu calls its even more interesting
104204 * as we'll have to ensure no other cpu is observing our csd.
105205 */
206+ static __always_inline void csd_lock_wait (call_single_data_t * csd )
207+ {
208+ int bug_id = 0 ;
209+ u64 ts0 , ts1 ;
210+
211+ ts1 = ts0 = sched_clock ();
212+ for (;;) {
213+ if (csd_lock_wait_toolong (csd , ts0 , & ts1 , & bug_id ))
214+ break ;
215+ cpu_relax ();
216+ }
217+ smp_acquire__after_ctrl_dep ();
218+ }
219+
220+ #else
221+ static void csd_lock_record (call_single_data_t * csd )
222+ {
223+ }
224+
106225static __always_inline void csd_lock_wait (call_single_data_t * csd )
107226{
108227 smp_cond_load_acquire (& csd -> flags , !(VAL & CSD_FLAG_LOCK ));
109228}
229+ #endif
110230
111231static __always_inline void csd_lock (call_single_data_t * csd )
112232{
@@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
166286 * We can unlock early even for the synchronous on-stack case,
167287 * since we're doing this from the same CPU..
168288 */
289+ csd_lock_record (csd );
169290 csd_unlock (csd );
170291 local_irq_save (flags );
171292 func (info );
293+ csd_lock_record (NULL );
172294 local_irq_restore (flags );
173295 return 0 ;
174296 }
@@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
268390 entry = & csd_next -> llist ;
269391 }
270392
393+ csd_lock_record (csd );
271394 func (info );
272395 csd_unlock (csd );
396+ csd_lock_record (NULL );
273397 } else {
274398 prev = & csd -> llist ;
275399 }
@@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
296420 smp_call_func_t func = csd -> func ;
297421 void * info = csd -> info ;
298422
423+ csd_lock_record (csd );
299424 csd_unlock (csd );
300425 func (info );
426+ csd_lock_record (NULL );
301427 } else if (type == CSD_TYPE_IRQ_WORK ) {
302428 irq_work_single (csd );
303429 }
@@ -375,6 +501,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
375501
376502 csd -> func = func ;
377503 csd -> info = info ;
504+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
505+ csd -> src = smp_processor_id ();
506+ csd -> dst = cpu ;
507+ #endif
378508
379509 err = generic_exec_single (cpu , csd );
380510
@@ -540,6 +670,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
540670 csd -> flags |= CSD_TYPE_SYNC ;
541671 csd -> func = func ;
542672 csd -> info = info ;
673+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
674+ csd -> src = smp_processor_id ();
675+ csd -> dst = cpu ;
676+ #endif
543677 if (llist_add (& csd -> llist , & per_cpu (call_single_queue , cpu )))
544678 __cpumask_set_cpu (cpu , cfd -> cpumask_ipi );
545679 }
0 commit comments