Skip to content

Commit 82f5e5b

Browse files
committed
Merge tag 'drm-xe-fixes-2026-04-02' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
uAPI Fix: - Accept canonical GPU addresses in xe_vm_madvise_ioctl (Arvind) Driver Fixes: - Disallow writes to read-only VMAs (Jonathan) - PXP fixes (Daniele) - Disable garbage collector work item on SVM clos (Brost) - void memory allocations in xe_device_declare_wedged (Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patch.msgid.link/ac5mDHs-McR5cJSV@intel.com
2 parents 9b454a3 + 56b7432 commit 82f5e5b

5 files changed

Lines changed: 48 additions & 26 deletions

File tree

drivers/gpu/drm/xe/xe_device.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,14 @@ static void detect_preproduction_hw(struct xe_device *xe)
837837
}
838838
}
839839

840+
static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
841+
{
842+
struct xe_device *xe = arg;
843+
844+
if (atomic_read(&xe->wedged.flag))
845+
xe_pm_runtime_put(xe);
846+
}
847+
840848
int xe_device_probe(struct xe_device *xe)
841849
{
842850
struct xe_tile *tile;
@@ -1013,6 +1021,10 @@ int xe_device_probe(struct xe_device *xe)
10131021

10141022
detect_preproduction_hw(xe);
10151023

1024+
err = drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe);
1025+
if (err)
1026+
goto err_unregister_display;
1027+
10161028
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
10171029

10181030
err_unregister_display:
@@ -1216,13 +1228,6 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
12161228
return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
12171229
}
12181230

1219-
static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
1220-
{
1221-
struct xe_device *xe = arg;
1222-
1223-
xe_pm_runtime_put(xe);
1224-
}
1225-
12261231
/**
12271232
* DOC: Xe Device Wedging
12281233
*
@@ -1300,15 +1305,9 @@ void xe_device_declare_wedged(struct xe_device *xe)
13001305
return;
13011306
}
13021307

1303-
xe_pm_runtime_get_noresume(xe);
1304-
1305-
if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
1306-
drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
1307-
return;
1308-
}
1309-
13101308
if (!atomic_xchg(&xe->wedged.flag, 1)) {
13111309
xe->needs_flr_on_fini = true;
1310+
xe_pm_runtime_get_noresume(xe);
13121311
drm_err(&xe->drm,
13131312
"CRITICAL: Xe has declared device %s as wedged.\n"
13141313
"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"

drivers/gpu/drm/xe/xe_pagefault.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ static int xe_pagefault_service(struct xe_pagefault *pf)
187187
goto unlock_vm;
188188
}
189189

190+
if (xe_vma_read_only(vma) &&
191+
pf->consumer.access_type != XE_PAGEFAULT_ACCESS_TYPE_READ) {
192+
err = -EPERM;
193+
goto unlock_vm;
194+
}
195+
190196
atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type);
191197

192198
if (xe_vma_is_cpu_addr_mirror(vma))

drivers/gpu/drm/xe/xe_pxp.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,18 @@ int xe_pxp_init(struct xe_device *xe)
380380
return 0;
381381
}
382382

383+
/*
384+
* On PTL, older GSC FWs have a bug that can cause them to crash during
385+
* PXP invalidation events, which leads to a complete loss of power
386+
* management on the media GT. Therefore, we can't use PXP on FWs that
387+
* have this bug, which was fixed in PTL GSC build 1396.
388+
*/
389+
if (xe->info.platform == XE_PANTHERLAKE &&
390+
gt->uc.gsc.fw.versions.found[XE_UC_FW_VER_RELEASE].build < 1396) {
391+
drm_info(&xe->drm, "PXP requires PTL GSC build 1396 or newer\n");
392+
return 0;
393+
}
394+
383395
pxp = drmm_kzalloc(&xe->drm, sizeof(struct xe_pxp), GFP_KERNEL);
384396
if (!pxp) {
385397
err = -ENOMEM;
@@ -512,7 +524,7 @@ static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
512524
static int pxp_start(struct xe_pxp *pxp, u8 type)
513525
{
514526
int ret = 0;
515-
bool restart = false;
527+
bool restart;
516528

517529
if (!xe_pxp_is_enabled(pxp))
518530
return -ENODEV;
@@ -541,6 +553,8 @@ static int pxp_start(struct xe_pxp *pxp, u8 type)
541553
msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS)))
542554
return -ETIMEDOUT;
543555

556+
restart = false;
557+
544558
mutex_lock(&pxp->mutex);
545559

546560
/* If PXP is not already active, turn it on */
@@ -583,6 +597,7 @@ static int pxp_start(struct xe_pxp *pxp, u8 type)
583597
drm_err(&pxp->xe->drm, "PXP termination failed before start\n");
584598
mutex_lock(&pxp->mutex);
585599
pxp->status = XE_PXP_ERROR;
600+
complete_all(&pxp->termination);
586601

587602
goto out_unlock;
588603
}
@@ -870,11 +885,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
870885
pxp->key_instance++;
871886
needs_queue_inval = true;
872887
break;
873-
default:
874-
drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u",
875-
pxp->status);
876-
ret = -EIO;
877-
goto out;
878888
}
879889

880890
/*
@@ -899,7 +909,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
899909

900910
pxp->last_suspend_key_instance = pxp->key_instance;
901911

902-
out:
903912
return ret;
904913
}
905914

drivers/gpu/drm/xe/xe_svm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ int xe_svm_init(struct xe_vm *vm)
903903
void xe_svm_close(struct xe_vm *vm)
904904
{
905905
xe_assert(vm->xe, xe_vm_is_closed(vm));
906-
flush_work(&vm->svm.garbage_collector.work);
906+
disable_work_sync(&vm->svm.garbage_collector.work);
907907
xe_svm_put_pagemaps(vm);
908908
drm_pagemap_release_owner(&vm->svm.peer);
909909
}

drivers/gpu/drm/xe/xe_vm_madvise.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,15 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
408408
struct xe_device *xe = to_xe_device(dev);
409409
struct xe_file *xef = to_xe_file(file);
410410
struct drm_xe_madvise *args = data;
411-
struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
412-
.range = args->range, };
411+
struct xe_vmas_in_madvise_range madvise_range = {
412+
/*
413+
* Userspace may pass canonical (sign-extended) addresses.
414+
* Strip the sign extension to get the internal non-canonical
415+
* form used by the GPUVM, matching xe_vm_bind_ioctl() behavior.
416+
*/
417+
.addr = xe_device_uncanonicalize_addr(xe, args->start),
418+
.range = args->range,
419+
};
413420
struct xe_madvise_details details;
414421
struct xe_vm *vm;
415422
struct drm_exec exec;
@@ -439,7 +446,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
439446
if (err)
440447
goto unlock_vm;
441448

442-
err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
449+
err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range);
443450
if (err)
444451
goto madv_fini;
445452

@@ -482,7 +489,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
482489
madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
483490
&details);
484491

485-
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
492+
err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
493+
madvise_range.addr + args->range);
486494

487495
if (madvise_range.has_svm_userptr_vmas)
488496
xe_svm_notifier_unlock(vm);

0 commit comments

Comments
 (0)