Skip to content

Commit bff164d

Browse files
Pull up following revision(s) (requested by skrll in ticket #40):
sys/arch/arm/include/cpufunc.h: revision 1.92 sys/arch/arm/arm32/bus_dma.c: revision 1.150 arm: Adjust barriers issued in bus_dmamap_sync for the coherent case. PR/59654 This change adjusts the memory barriers issued by bus_dmamap_sync for the coherent case. In the non-coherent case the CPU cache operations are expected to provide any, and all required barriers. These barriers are emitted after bouncing for PREWRITE and before bouncing for POSTREAD. A new indrection is introduced to deal with the differencs in barrier (dsb) options between Arm V8 and V7, the lack of options in Arm V6, and the need to use cpu_drain_writebuf for all other earlier versions. The Arm V8 Net change is: op old new --------------------- ---------------------- ------------ PREREAD none dsb(osh) PREWRITE cpu_drain_writebuf dsb(oshst) PREREAD|PREWRITE cpu_drain_writebuf dsb(osh) POSTREAD cpu_drain_writebuf dsb(oshld) POSTWRITE none none POSTREAD|POSTWRITE cpu_drain_writebuf dsb(oshld) where cpu_drain_writebuf is a dsb(sy) or CPU equivalent.
1 parent 19c8372 commit bff164d

File tree

2 files changed

+63
-10
lines changed

2 files changed

+63
-10
lines changed

sys/arch/arm/arm32/bus_dma.c

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $NetBSD: bus_dma.c,v 1.149 2025/03/08 21:00:45 jmcneill Exp $ */
1+
/* $NetBSD: bus_dma.c,v 1.149.2.1 2025/10/01 16:41:13 martin Exp $ */
22

33
/*-
44
* Copyright (c) 1996, 1997, 1998, 2020 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
3636
#include "opt_cputypes.h"
3737

3838
#include <sys/cdefs.h>
39-
__KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.149 2025/03/08 21:00:45 jmcneill Exp $");
39+
__KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.149.2.1 2025/10/01 16:41:13 martin Exp $");
4040

4141
#include <sys/param.h>
4242

@@ -1200,38 +1200,77 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
12001200
}
12011201
}
12021202

1203-
/* Skip cache frobbing if mapping was COHERENT */
1203+
/*
1204+
* Provide appropriate memory barriers, and skip cache frobbing
1205+
* if mapping is COHERENT.
1206+
*
1207+
* The case of PREREAD is as follows:
1208+
*
1209+
* 1. loads and stores before DMA buffer may be allocated for the purpose
1210+
* 2. bus_dmamap_sync(BUS_DMASYNC_PREREAD)
1211+
* 3. store to register or DMA descriptor to trigger DMA
1212+
*
1213+
* The load/store-before-store ordering is ensured by dma_rw_w().
1214+
*
1215+
* The case of PREWRITE is as follows:
1216+
*
1217+
* 1. stores to DMA buffer. loads can happen later as the buffer is not changed
1218+
* by the device.
1219+
* 2. bus_dmamap_sync(BUS_DMASYNC_PREWRITE)
1220+
* 3. store to register or DMA descriptor to trigger DMA
1221+
*
1222+
* The store-before-store ordering is ensured by dma_w_w().
1223+
*
1224+
* The case of POSTREAD is as follows:
1225+
*
1226+
* 1. load from register or DMA descriptor notifying DMA completion
1227+
* 2. bus_dmamap_sync(BUS_DMASYNC_POSTREAD)
1228+
* 3. loads from DMA buffer to use data, and stores to reuse buffer
1229+
*
1230+
* The stores in (3) will not be speculated and, therefore, don't need
1231+
* specific handling. The load-before-load ordering is ensured by
1232+
* dma_r_r().
1233+
*
1234+
* The case of POSTWRITE is as follows:
1235+
*
1236+
* 1. load from register or DMA descriptor notifying DMA completion
1237+
* 2. bus_dmamap_sync(BUS_DMASYNC_POSTWRITE)
1238+
* 3. loads and stores to reuse buffer
1239+
*
1240+
* The stores in (3) will not be speculated, and the load can happen at any
1241+
* time as the DMA buffer is not changed by the device so no barrier is
1242+
* required.
1243+
*/
1244+
12041245
if ((map->_dm_flags & _BUS_DMAMAP_COHERENT)) {
12051246
switch (ops) {
12061247
case BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE:
1248+
dma_rw_w();
12071249
STAT_INCR(sync_coherent_prereadwrite);
12081250
break;
12091251

12101252
case BUS_DMASYNC_PREREAD:
1253+
dma_rw_w();
12111254
STAT_INCR(sync_coherent_preread);
12121255
break;
12131256

12141257
case BUS_DMASYNC_PREWRITE:
1258+
dma_w_w();
12151259
STAT_INCR(sync_coherent_prewrite);
12161260
break;
12171261

12181262
case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
1263+
dma_r_r();
12191264
STAT_INCR(sync_coherent_postreadwrite);
12201265
break;
12211266

12221267
case BUS_DMASYNC_POSTREAD:
1268+
dma_r_r();
12231269
STAT_INCR(sync_coherent_postread);
12241270
break;
12251271

12261272
/* BUS_DMASYNC_POSTWRITE was aleady handled as a fastpath */
12271273
}
1228-
/*
1229-
* Drain the write buffer of DMA operators.
1230-
* 1) when cpu->device (prewrite)
1231-
* 2) when device->cpu (postread)
1232-
*/
1233-
if ((pre_ops & BUS_DMASYNC_PREWRITE) || (post_ops & BUS_DMASYNC_POSTREAD))
1234-
cpu_drain_writebuf();
12351274

12361275
/*
12371276
* Only thing left to do for COHERENT mapping is copy from bounce

sys/arch/arm/include/cpufunc.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,20 @@
7575

7676
#endif
7777

78+
#if defined(_ARM_ARCH_8)
79+
#define dma_r_r() dsb(oshld) // actually r_rw
80+
#define dma_w_w() dsb(oshst)
81+
#define dma_rw_w() dsb(osh) // actually rw_rw
82+
#elif defined(_ARM_ARCH_6)
83+
#define dma_r_r() dsb(osh) // actually rw_rw
84+
#define dma_w_w() dsb(oshst)
85+
#define dma_rw_w() dsb(osh) // actually rw_rw
86+
#else
87+
#define dma_r_r() __nothing
88+
#define dma_w_w() cpu_drain_writebuf()
89+
#define dma_rw_w() cpu_drain_writebuf()
90+
#endif
91+
7892
#ifdef __arm__
7993

8094
#ifdef _KERNEL

0 commit comments

Comments
 (0)