Skip to content

Commit 3bff611

Browse files
committed
Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: "x86 Intel updates: - Add Jasper Lake support - Add support for TopDown metrics on Ice Lake - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support - Add a PCI sub driver to support uncore PMUs where the PCI resources have been claimed already - extending the range of supported systems. x86 AMD updates: - Restore 'perf stat -a' behaviour to program the uncore PMU to count all CPU threads. - Fix setting the proper count when sampling Large Increment per Cycle events / 'paired' events. - Fix IBS Fetch sampling on F17h and some other IBS fine tuning, greatly reducing the number of interrupts when large sample periods are specified. - Extends Family 17h RAPL support to also work on compatible F19h machines. Core code updates: - Fix race in perf_mmap_close() - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be closed if the leader is removed. - Smaller fixes and updates" * tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) perf/core: Fix race in the perf_mmap_close() function perf/x86: Fix n_metric for cancelled txn perf/x86: Fix n_pair for cancelled txn x86/events/amd/iommu: Fix sizeof mismatch perf/x86/intel: Check perf metrics feature for each CPU perf/x86/intel: Fix Ice Lake event constraint table perf/x86/intel/uncore: Fix the scale of the IMC free-running events perf/x86/intel/uncore: Fix for iio mapping on Skylake Server perf/x86/msr: Add Jasper Lake support perf/x86/intel: Add Jasper Lake support perf/x86/intel/uncore: Reduce the number of CBOX counters perf/x86/intel/uncore: Update Ice Lake uncore units perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge perf/x86/intel/uncore: Generic support for the PCI sub driver perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister() perf/x86/intel/uncore: Factor out uncore_pci_pmu_register() perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu() perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info() perf/amd/uncore: Inform the user how many counters each uncore PMU has ...
2 parents dd502a8 + f91072e commit 3bff611

18 files changed

Lines changed: 1182 additions & 300 deletions

File tree

arch/x86/events/amd/ibs.c

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ struct perf_ibs {
8989
u64 max_period;
9090
unsigned long offset_mask[1];
9191
int offset_max;
92+
unsigned int fetch_count_reset_broken : 1;
9293
struct cpu_perf_ibs __percpu *pcpu;
9394

9495
struct attribute **format_attrs;
@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
334335
{
335336
u64 count = 0;
336337

337-
if (config & IBS_OP_VAL)
338-
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
339-
340-
if (ibs_caps & IBS_CAPS_RDWROPCNT)
341-
count += (config & IBS_OP_CUR_CNT) >> 32;
338+
/*
339+
* If the internal 27-bit counter rolled over, the count is MaxCnt
340+
* and the lower 7 bits of CurCnt are randomized.
341+
* Otherwise CurCnt has the full 27-bit current counter value.
342+
*/
343+
if (config & IBS_OP_VAL) {
344+
count = (config & IBS_OP_MAX_CNT) << 4;
345+
if (ibs_caps & IBS_CAPS_OPCNTEXT)
346+
count += config & IBS_OP_MAX_CNT_EXT_MASK;
347+
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
348+
count = (config & IBS_OP_CUR_CNT) >> 32;
349+
}
342350

343351
return count;
344352
}
@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
363371
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
364372
struct hw_perf_event *hwc, u64 config)
365373
{
366-
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
374+
u64 tmp = hwc->config | config;
375+
376+
if (perf_ibs->fetch_count_reset_broken)
377+
wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
378+
379+
wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
367380
}
368381

369382
/*
@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
394407
struct hw_perf_event *hwc = &event->hw;
395408
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
396409
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
397-
u64 period;
410+
u64 period, config = 0;
398411

399412
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
400413
return;
@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
403416
hwc->state = 0;
404417

405418
perf_ibs_set_period(perf_ibs, hwc, &period);
419+
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
420+
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
421+
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
422+
}
423+
config |= period >> 4;
424+
406425
/*
407426
* Set STARTED before enabling the hardware, such that a subsequent NMI
408427
* must observe it.
409428
*/
410429
set_bit(IBS_STARTED, pcpu->state);
411430
clear_bit(IBS_STOPPING, pcpu->state);
412-
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
431+
perf_ibs_enable_event(perf_ibs, hwc, config);
413432

414433
perf_event_update_userpage(event);
415434
}
@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
577596
struct perf_ibs_data ibs_data;
578597
int offset, size, check_rip, offset_max, throttle = 0;
579598
unsigned int msr;
580-
u64 *buf, *config, period;
599+
u64 *buf, *config, period, new_config = 0;
581600

582601
if (!test_bit(IBS_STARTED, pcpu->state)) {
583602
fail:
@@ -626,18 +645,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
626645
perf_ibs->offset_max,
627646
offset + 1);
628647
} while (offset < offset_max);
648+
/*
649+
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
650+
* depending on their availability.
651+
* Can't add to offset_max as they are staggered
652+
*/
629653
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
630-
/*
631-
* Read IbsBrTarget and IbsOpData4 separately
632-
* depending on their availability.
633-
* Can't add to offset_max as they are staggered
634-
*/
635-
if (ibs_caps & IBS_CAPS_BRNTRGT) {
636-
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
637-
size++;
654+
if (perf_ibs == &perf_ibs_op) {
655+
if (ibs_caps & IBS_CAPS_BRNTRGT) {
656+
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
657+
size++;
658+
}
659+
if (ibs_caps & IBS_CAPS_OPDATA4) {
660+
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
661+
size++;
662+
}
638663
}
639-
if (ibs_caps & IBS_CAPS_OPDATA4) {
640-
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
664+
if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
665+
rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
641666
size++;
642667
}
643668
}
@@ -666,13 +691,17 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
666691
if (throttle) {
667692
perf_ibs_stop(event, 0);
668693
} else {
669-
period >>= 4;
670-
671-
if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
672-
(*config & IBS_OP_CNT_CTL))
673-
period |= *config & IBS_OP_CUR_CNT_RAND;
694+
if (perf_ibs == &perf_ibs_op) {
695+
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
696+
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
697+
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
698+
}
699+
if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
700+
new_config |= *config & IBS_OP_CUR_CNT_RAND;
701+
}
702+
new_config |= period >> 4;
674703

675-
perf_ibs_enable_event(perf_ibs, hwc, period);
704+
perf_ibs_enable_event(perf_ibs, hwc, new_config);
676705
}
677706

678707
perf_event_update_userpage(event);
@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
733762
{
734763
struct attribute **attr = ibs_op_format_attrs;
735764

765+
/*
766+
* Some chips fail to reset the fetch count when it is written; instead
767+
* they need a 0-1 transition of IbsFetchEn.
768+
*/
769+
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
770+
perf_ibs_fetch.fetch_count_reset_broken = 1;
771+
736772
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
737773

738774
if (ibs_caps & IBS_CAPS_OPCNT) {
739775
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
740776
*attr++ = &format_attr_cnt_ctl.attr;
741777
}
778+
779+
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
780+
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
781+
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
782+
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
783+
}
784+
742785
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
743786

744787
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");

arch/x86/events/amd/iommu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
379379
while (amd_iommu_v2_event_descs[i].attr.attr.name)
380380
i++;
381381

382-
attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
382+
attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
383383
if (!attrs)
384384
return -ENOMEM;
385385

0 commit comments

Comments
 (0)