Skip to content

Commit d0a37fd

Browse files
committed
Merge tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Thomas Gleixner: "A set of scheduler fixes: - Address a load balancer regression by making the load balancer use the same logic as the wakeup path to spread tasks in the LLC domain - Prefer the CPU on which a task run last over the local CPU in the fast wakeup path for asymmetric CPU capacity systems to align with the symmetric case. This ensures more locality and prevents massive migration overhead on those asymetric systems - Fix a memory corruption bug in the scheduler debug code caused by handing a modified buffer pointer to kfree()" * tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/debug: Fix memory corruption caused by multiple small reads of flags sched/fair: Prefer prev cpu in asymmetric wakeup path sched/fair: Ensure tasks spreading in LLC during LB
2 parents 259c2fb + 8d4d9c7 commit d0a37fd

2 files changed

Lines changed: 51 additions & 31 deletions

File tree

kernel/sched/debug.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
251251
unsigned long flags = *(unsigned long *)table->data;
252252
size_t data_size = 0;
253253
size_t len = 0;
254-
char *tmp;
254+
char *tmp, *buf;
255255
int idx;
256256

257257
if (write)
@@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
269269
return 0;
270270
}
271271

272-
tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
273-
if (!tmp)
272+
buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
273+
if (!buf)
274274
return -ENOMEM;
275275

276276
for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
277277
char *name = sd_flag_debug[idx].name;
278278

279-
len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
279+
len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
280280
}
281281

282-
tmp += *ppos;
282+
tmp = buf + *ppos;
283283
len -= *ppos;
284284

285285
if (len > *lenp)
@@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
294294
*lenp = len;
295295
*ppos += len;
296296

297-
kfree(tmp);
297+
kfree(buf);
298298

299299
return 0;
300300
}

kernel/sched/fair.c

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
61726172
static int
61736173
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
61746174
{
6175-
unsigned long best_cap = 0;
6175+
unsigned long task_util, best_cap = 0;
61766176
int cpu, best_cpu = -1;
61776177
struct cpumask *cpus;
61786178

6179-
sync_entity_load_avg(&p->se);
6180-
61816179
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
61826180
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
61836181

6182+
task_util = uclamp_task_util(p);
6183+
61846184
for_each_cpu_wrap(cpu, cpus, target) {
61856185
unsigned long cpu_cap = capacity_of(cpu);
61866186

61876187
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
61886188
continue;
6189-
if (task_fits_capacity(p, cpu_cap))
6189+
if (fits_capacity(task_util, cpu_cap))
61906190
return cpu;
61916191

61926192
if (cpu_cap > best_cap) {
@@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
61986198
return best_cpu;
61996199
}
62006200

6201+
static inline bool asym_fits_capacity(int task_util, int cpu)
6202+
{
6203+
if (static_branch_unlikely(&sched_asym_cpucapacity))
6204+
return fits_capacity(task_util, capacity_of(cpu));
6205+
6206+
return true;
6207+
}
6208+
62016209
/*
62026210
* Try and locate an idle core/thread in the LLC cache domain.
62036211
*/
62046212
static int select_idle_sibling(struct task_struct *p, int prev, int target)
62056213
{
62066214
struct sched_domain *sd;
6215+
unsigned long task_util;
62076216
int i, recent_used_cpu;
62086217

62096218
/*
6210-
* For asymmetric CPU capacity systems, our domain of interest is
6211-
* sd_asym_cpucapacity rather than sd_llc.
6219+
* On asymmetric system, update task utilization because we will check
6220+
* that the task fits with cpu's capacity.
62126221
*/
62136222
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
6214-
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
6215-
/*
6216-
* On an asymmetric CPU capacity system where an exclusive
6217-
* cpuset defines a symmetric island (i.e. one unique
6218-
* capacity_orig value through the cpuset), the key will be set
6219-
* but the CPUs within that cpuset will not have a domain with
6220-
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
6221-
* capacity path.
6222-
*/
6223-
if (!sd)
6224-
goto symmetric;
6225-
6226-
i = select_idle_capacity(p, sd, target);
6227-
return ((unsigned)i < nr_cpumask_bits) ? i : target;
6223+
sync_entity_load_avg(&p->se);
6224+
task_util = uclamp_task_util(p);
62286225
}
62296226

6230-
symmetric:
6231-
if (available_idle_cpu(target) || sched_idle_cpu(target))
6227+
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
6228+
asym_fits_capacity(task_util, target))
62326229
return target;
62336230

62346231
/*
62356232
* If the previous CPU is cache affine and idle, don't be stupid:
62366233
*/
62376234
if (prev != target && cpus_share_cache(prev, target) &&
6238-
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
6235+
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
6236+
asym_fits_capacity(task_util, prev))
62396237
return prev;
62406238

62416239
/*
@@ -6258,7 +6256,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
62586256
recent_used_cpu != target &&
62596257
cpus_share_cache(recent_used_cpu, target) &&
62606258
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
6261-
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
6259+
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
6260+
asym_fits_capacity(task_util, recent_used_cpu)) {
62626261
/*
62636262
* Replace recent_used_cpu with prev as it is a potential
62646263
* candidate for the next wake:
@@ -6267,6 +6266,26 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
62676266
return recent_used_cpu;
62686267
}
62696268

6269+
/*
6270+
* For asymmetric CPU capacity systems, our domain of interest is
6271+
* sd_asym_cpucapacity rather than sd_llc.
6272+
*/
6273+
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
6274+
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
6275+
/*
6276+
* On an asymmetric CPU capacity system where an exclusive
6277+
* cpuset defines a symmetric island (i.e. one unique
6278+
* capacity_orig value through the cpuset), the key will be set
6279+
* but the CPUs within that cpuset will not have a domain with
6280+
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
6281+
* capacity path.
6282+
*/
6283+
if (sd) {
6284+
i = select_idle_capacity(p, sd, target);
6285+
return ((unsigned)i < nr_cpumask_bits) ? i : target;
6286+
}
6287+
}
6288+
62706289
sd = rcu_dereference(per_cpu(sd_llc, target));
62716290
if (!sd)
62726291
return target;
@@ -9031,7 +9050,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
90319050
* emptying busiest.
90329051
*/
90339052
if (local->group_type == group_has_spare) {
9034-
if (busiest->group_type > group_fully_busy) {
9053+
if ((busiest->group_type > group_fully_busy) &&
9054+
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
90359055
/*
90369056
* If busiest is overloaded, try to fill spare
90379057
* capacity. This might end up creating spare capacity

0 commit comments

Comments
 (0)