Skip to content

Commit 3d7772e

Browse files
committed
tools/power turbostat: harden against cpu hotplug
turbostat tends to get confused when CPUs are added and removed while it is running. There are races, such as checking the current cpu, and then reading a sysfs file that depends on that cpu number. Close the two issues that seem to come up the most. First, there is an infinite reset loop detector -- change that to allow more resets before giving up. Secondly, one of those file reads didn't really need to exit the program on failure... Signed-off-by: Len Brown <len.brown@intel.com>
1 parent 6ff7cb3 commit 3d7772e

1 file changed

Lines changed: 14 additions & 9 deletions

File tree

tools/power/x86/turbostat/turbostat.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
18941894
int i;
18951895

18961896
if (cpu_migrate(cpu)) {
1897-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1897+
fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
18981898
return -1;
18991899
}
19001900

@@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
27642764

27652765
sprintf(path,
27662766
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2767-
filep = fopen_or_die(path, "r");
2767+
filep = fopen(path, "r");
2768+
2769+
if (!filep) {
2770+
warnx("%s: open failed", path);
2771+
return -1;
2772+
}
27682773
do {
27692774
offset -= BITMASK_SIZE;
27702775
if (fscanf(filep, "%lx%c", &map, &character) != 2)
@@ -2877,7 +2882,7 @@ void re_initialize(void)
28772882
{
28782883
free_all_buffers();
28792884
setup_all_buffers();
2880-
printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2885+
fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
28812886
}
28822887

28832888
void set_max_cpu_num(void)
@@ -3331,7 +3336,7 @@ void turbostat_loop()
33313336
if (retval < -1) {
33323337
exit(retval);
33333338
} else if (retval == -1) {
3334-
if (restarted > 1) {
3339+
if (restarted > 10) {
33353340
exit(retval);
33363341
}
33373342
re_initialize();
@@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
39263931
return 0;
39273932

39283933
if (cpu_migrate(cpu)) {
3929-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3934+
fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
39303935
return -1;
39313936
}
39323937

@@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
39703975
return 0;
39713976

39723977
if (cpu_migrate(cpu)) {
3973-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3978+
fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
39743979
return -1;
39753980
}
39763981

@@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
40584063
return 0;
40594064

40604065
if (cpu_migrate(cpu)) {
4061-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4066+
fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
40624067
return -1;
40634068
}
40644069

@@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
44394444
return 0;
44404445

44414446
if (cpu_migrate(cpu)) {
4442-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4447+
fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
44434448
return -1;
44444449
}
44454450

@@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
45114516

45124517
cpu = t->cpu_id;
45134518
if (cpu_migrate(cpu)) {
4514-
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4519+
fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
45154520
return -1;
45164521
}
45174522

0 commit comments

Comments
 (0)