Skip to content

Commit 60d9212

Browse files
committed
Merge tag 'drm-fixes-2026-04-03' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "Hopefully no Easter eggs in this bunch of fixes. Usual stuff across the amd/intel with some misc bits. Thanks to Thorsten and Alex for making sure a regression fix that was hanging around in process land finally made it in, that is probably the biggest change in here. core: - revert unplug/framebuffer fix as it caused problems - compat ioctl speculation fix bridge: - refcounting fix sysfb: - error handling fix amdgpu: - fix renoir audio regression - UserQ fixes - PASID handling fix - S4 fix for smu11 chips - Misc small fixes amdkfd: - Non-4K page fixes i915: - Fix for #12045: Huawei Matebook E (DRR-WXX): Persistent Black Screen on Boot with i915 and Gen11: Modesetting and Backlight Control Malfunction - Fix for #15826: i915: Raptor Lake-P [UHD Graphics] display flicker/corruption on eDP panel - Use crtc_state->enhanced_framing properly on ivb/hsw CPU eDP xe: - uapi: Accept canonical GPU addresses in xe_vm_madvise_ioctl - Disallow writes to read-only VMAs - PXP fixes - Disable garbage collector work item on SVM close - void memory allocations in xe_device_declare_wedged qaic: - hang fix ast: - initialisation fix" * tag 'drm-fixes-2026-04-03' of https://gitlab.freedesktop.org/drm/kernel: (28 commits) drm/amd/display: Wire up dcn10_dio_construct() for all pre-DCN401 generations drm/ioc32: stop speculation on the drm_compat_ioctl path drm/sysfb: Fix efidrm error handling and memory type mismatch drm/i915/dp: Use crtc_state->enhanced_framing properly on ivb/hsw CPU eDP drm/i915/cdclk: Do the full CDCLK dance for min_voltage_level changes drm/amdkfd: Fix queue preemption/eviction failures by aligning control stack size to GPU page size drm/amdgpu: Fix wait after reset sequence in S4 drm/amd/display: Fix NULL pointer dereference in dcn401_init_hw() drm/amdgpu: Change AMDGPU_VA_RESERVED_TRAP_SIZE to 64KB drm/amdgpu/userq: fix memory leak in MQD creation error paths drm/amd: Fix MQD and control stack alignment for non-4K drm/amdkfd: Align expected_queue_size to PAGE_SIZE drm/amdgpu: fix the idr allocation flags drm/amdgpu: validate doorbell_offset in user queue creation drm/amdgpu/pm: drop SMU driver if version not matched messages drm/xe: Avoid memory allocations in xe_device_declare_wedged() drm/xe: Disable garbage collector work item on SVM close drm/xe/pxp: Don't allow PXP on older PTL GSC FWs drm/xe/pxp: Clear restart flag in pxp_start after jumping back drm/xe/pxp: Remove incorrect handling of impossible state during suspend ...
2 parents d8a9a4b + 75f53c4 commit 60d9212

48 files changed

Lines changed: 1013 additions & 108 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/accel/qaic/qaic_control.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len
914914
*/
915915
return -ENODEV;
916916

917-
if (status) {
917+
if (usr && status) {
918918
/*
919919
* Releasing resources failed on the device side, which puts
920920
* us in a bind since they may still be in use, so enable the
@@ -1109,6 +1109,9 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
11091109
mutex_lock(&qdev->cntl_mutex);
11101110
if (!list_empty(&elem.list))
11111111
list_del(&elem.list);
1112+
/* resp_worker() processed the response but the wait was interrupted */
1113+
else if (ret == -ERESTARTSYS)
1114+
ret = 0;
11121115
if (!ret && !elem.buf)
11131116
ret = -ETIMEDOUT;
11141117
else if (ret > 0 && !elem.buf)
@@ -1419,9 +1422,49 @@ static void resp_worker(struct work_struct *work)
14191422
}
14201423
mutex_unlock(&qdev->cntl_mutex);
14211424

1422-
if (!found)
1425+
if (!found) {
1426+
/*
1427+
* The user might have gone away at this point without waiting
1428+
* for QAIC_TRANS_DEACTIVATE_FROM_DEV transaction coming from
1429+
* the device. If this is not handled correctly, the host will
1430+
* not know that the DBC[n] has been freed on the device.
1431+
* Due to this failure in synchronization between the device and
1432+
* the host, if another user requests to activate a network, and
1433+
* the device assigns DBC[n] again, save_dbc_buf() will hang,
1434+
* waiting for dbc[n]->in_use to be set to false, which will not
1435+
* happen unless the qaic_dev_reset_clean_local_state() gets
1436+
* called by resetting the device (or re-inserting the module).
1437+
*
1438+
* As a solution, we look for QAIC_TRANS_DEACTIVATE_FROM_DEV
1439+
* transactions in the message before disposing of it, then
1440+
* handle releasing the DBC resources.
1441+
*
1442+
* Since the user has gone away, if the device could not
1443+
* deactivate the network (status != 0), there is no way to
1444+
* enable and reassign the DBC to the user. We can put trust in
1445+
* the device that it will release all the active DBCs in
1446+
* response to the QAIC_TRANS_TERMINATE_TO_DEV transaction,
1447+
* otherwise, the user can issue an soc_reset to the device.
1448+
*/
1449+
u32 msg_count = le32_to_cpu(msg->hdr.count);
1450+
u32 msg_len = le32_to_cpu(msg->hdr.len);
1451+
u32 len = 0;
1452+
int j;
1453+
1454+
for (j = 0; j < msg_count && len < msg_len; ++j) {
1455+
struct wire_trans_hdr *trans_hdr;
1456+
1457+
trans_hdr = (struct wire_trans_hdr *)(msg->data + len);
1458+
if (le32_to_cpu(trans_hdr->type) == QAIC_TRANS_DEACTIVATE_FROM_DEV) {
1459+
if (decode_deactivate(qdev, trans_hdr, &len, NULL))
1460+
len += le32_to_cpu(trans_hdr->len);
1461+
} else {
1462+
len += le32_to_cpu(trans_hdr->len);
1463+
}
1464+
}
14231465
/* request must have timed out, drop packet */
14241466
kfree(msg);
1467+
}
14251468

14261469
kfree(resp);
14271470
}

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2703,8 +2703,12 @@ static int amdgpu_pmops_freeze(struct device *dev)
27032703
if (r)
27042704
return r;
27052705

2706-
if (amdgpu_acpi_should_gpu_reset(adev))
2707-
return amdgpu_asic_reset(adev);
2706+
if (amdgpu_acpi_should_gpu_reset(adev)) {
2707+
amdgpu_device_lock_reset_domain(adev->reset_domain);
2708+
r = amdgpu_asic_reset(adev);
2709+
amdgpu_device_unlock_reset_domain(adev->reset_domain);
2710+
return r;
2711+
}
27082712
return 0;
27092713
}
27102714

drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,50 @@ void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
403403
drm_dev_exit(idx);
404404
}
405405

406+
/**
407+
* amdgpu_gart_map_gfx9_mqd - map mqd and ctrl_stack dma_addresses into GART entries
408+
*
409+
* @adev: amdgpu_device pointer
410+
* @offset: offset into the GPU's gart aperture
411+
* @pages: number of pages to bind
412+
* @dma_addr: DMA addresses of pages
413+
* @flags: page table entry flags
414+
*
415+
* Map the MQD and control stack addresses into GART entries with the correct
416+
* memory types on gfxv9. The MQD occupies the first 4KB and is followed by
417+
* the control stack. The MQD uses UC (uncached) memory, while the control stack
418+
* uses NC (non-coherent) memory.
419+
*/
420+
void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
421+
int pages, dma_addr_t *dma_addr, uint64_t flags)
422+
{
423+
uint64_t page_base;
424+
unsigned int i, j, t;
425+
int idx;
426+
uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
427+
void *dst;
428+
429+
if (!adev->gart.ptr)
430+
return;
431+
432+
if (!drm_dev_enter(adev_to_drm(adev), &idx))
433+
return;
434+
435+
t = offset / AMDGPU_GPU_PAGE_SIZE;
436+
dst = adev->gart.ptr;
437+
for (i = 0; i < pages; i++) {
438+
page_base = dma_addr[i];
439+
for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
440+
if ((i == 0) && (j == 0))
441+
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
442+
else
443+
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, ctrl_flags);
444+
page_base += AMDGPU_GPU_PAGE_SIZE;
445+
}
446+
}
447+
drm_dev_exit(idx);
448+
}
449+
406450
/**
407451
* amdgpu_gart_bind - bind pages into the gart page table
408452
*

drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
6262
void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
6363
int pages, dma_addr_t *dma_addr, uint64_t flags,
6464
void *dst);
65+
void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
66+
int pages, dma_addr_t *dma_addr, uint64_t flags);
6567
void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
6668
int pages, dma_addr_t *dma_addr, uint64_t flags);
6769
void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,11 @@ int amdgpu_pasid_alloc(unsigned int bits)
6868
return -EINVAL;
6969

7070
spin_lock(&amdgpu_pasid_idr_lock);
71+
/* TODO: Need to replace the idr with an xarry, and then
72+
* handle the internal locking with ATOMIC safe paths.
73+
*/
7174
pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
72-
1U << bits, GFP_KERNEL);
75+
1U << bits, GFP_ATOMIC);
7376
spin_unlock(&amdgpu_pasid_idr_lock);
7477

7578
if (pasid >= 0)

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -853,25 +853,15 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
853853
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
854854
uint64_t page_idx, pages_per_xcc;
855855
int i;
856-
uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
857856

858857
pages_per_xcc = total_pages;
859858
do_div(pages_per_xcc, num_xcc);
860859

861860
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
862-
/* MQD page: use default flags */
863-
amdgpu_gart_bind(adev,
861+
amdgpu_gart_map_gfx9_mqd(adev,
864862
gtt->offset + (page_idx << PAGE_SHIFT),
865-
1, &gtt->ttm.dma_address[page_idx], flags);
866-
/*
867-
* Ctrl pages - modify the memory type to NC (ctrl_flags) from
868-
* the second page of the BO onward.
869-
*/
870-
amdgpu_gart_bind(adev,
871-
gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
872-
pages_per_xcc - 1,
873-
&gtt->ttm.dma_address[page_idx + 1],
874-
ctrl_flags);
863+
pages_per_xcc, &gtt->ttm.dma_address[page_idx],
864+
flags);
875865
}
876866
}
877867

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
600600
goto unpin_bo;
601601
}
602602

603+
/* Validate doorbell_offset is within the doorbell BO */
604+
if ((u64)db_info->doorbell_offset * db_size + db_size >
605+
amdgpu_bo_size(db_obj->obj)) {
606+
r = -EINVAL;
607+
goto unpin_bo;
608+
}
609+
603610
index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
604611
db_info->doorbell_offset, db_size);
605612
drm_dbg_driver(adev_to_drm(uq_mgr->adev),

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ struct amdgpu_bo_vm;
173173
#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
174174
#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
175175
- AMDGPU_VA_RESERVED_SEQ64_SIZE)
176-
#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
176+
#define AMDGPU_VA_RESERVED_TRAP_SIZE (1ULL << 16)
177177
#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
178178
- AMDGPU_VA_RESERVED_TRAP_SIZE)
179179
#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)

drivers/gpu/drm/amd/amdgpu/mes_userqueue.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
324324

325325
r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va,
326326
2048);
327-
if (r)
327+
if (r) {
328+
kfree(compute_mqd);
328329
goto free_mqd;
330+
}
329331

330332
userq_props->eop_gpu_addr = compute_mqd->eop_va;
331333
userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
@@ -365,12 +367,16 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
365367

366368
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va,
367369
shadow_info.shadow_size);
368-
if (r)
370+
if (r) {
371+
kfree(mqd_gfx_v11);
369372
goto free_mqd;
373+
}
370374
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va,
371375
shadow_info.csa_size);
372-
if (r)
376+
if (r) {
377+
kfree(mqd_gfx_v11);
373378
goto free_mqd;
379+
}
374380

375381
kfree(mqd_gfx_v11);
376382
} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
@@ -390,8 +396,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
390396
}
391397
r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va,
392398
32);
393-
if (r)
399+
if (r) {
400+
kfree(mqd_sdma_v11);
394401
goto free_mqd;
402+
}
395403

396404
userq_props->csa_addr = mqd_sdma_v11->csa_va;
397405
kfree(mqd_sdma_v11);

drivers/gpu/drm/amd/amdgpu/psp_v11_0.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
170170
int retry_loop;
171171

172172
/* For a reset done at the end of S3, only wait for TOS to be unloaded */
173-
if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
173+
if ((adev->in_s4 || adev->in_s3) && !(adev->flags & AMD_IS_APU) &&
174+
amdgpu_in_reset(adev))
174175
return psp_v11_wait_for_tos_unload(psp);
175176

176177
for (retry_loop = 0; retry_loop < 20; retry_loop++) {

0 commit comments

Comments
 (0)