Skip to content

Commit ad3e314

Browse files
Pull up following revision(s) (requested by skrll in ticket #1166):
sys/arch/arm/arm32/bus_dma.c: revision 1.142 sys/arch/arm/include/cpufunc.h: revision 1.92 sys/arch/arm/arm32/bus_dma.c: revision 1.150 More KNF arm: Adjust barriers issued in bus_dmamap_sync for the coherent case. PR/59654 This change adjusts the memory barriers issued by bus_dmamap_sync for the coherent case. In the non-coherent case the CPU cache operations are expected to provide any, and all required barriers. These barriers are emitted after bouncing for PREWRITE and before bouncing for POSTREAD. A new indrection is introduced to deal with the differencs in barrier (dsb) options between Arm V8 and V7, the lack of options in Arm V6, and the need to use cpu_drain_writebuf for all other earlier versions. The Arm V8 Net change is: op old new --------------------- ---------------------- ------------ PREREAD none dsb(osh) PREWRITE cpu_drain_writebuf dsb(oshst) PREREAD|PREWRITE cpu_drain_writebuf dsb(osh) POSTREAD cpu_drain_writebuf dsb(oshld) POSTWRITE none none POSTREAD|POSTWRITE cpu_drain_writebuf dsb(oshld) where cpu_drain_writebuf is a dsb(sy) or CPU equivalent.
1 parent dae87d7 commit ad3e314

2 files changed

Lines changed: 72 additions & 19 deletions

File tree

sys/arch/arm/arm32/bus_dma.c

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $NetBSD: bus_dma.c,v 1.138 2022/10/11 22:03:37 andvar Exp $ */
1+
/* $NetBSD: bus_dma.c,v 1.138.2.1 2025/10/01 16:54:19 martin Exp $ */
22

33
/*-
44
* Copyright (c) 1996, 1997, 1998, 2020 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
3636
#include "opt_cputypes.h"
3737

3838
#include <sys/cdefs.h>
39-
__KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.138 2022/10/11 22:03:37 andvar Exp $");
39+
__KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.138.2.1 2025/10/01 16:54:19 martin Exp $");
4040

4141
#include <sys/param.h>
4242

@@ -875,7 +875,7 @@ _bus_dmamap_sync_segment(vaddr_t va, paddr_t pa, vsize_t len, int ops,
875875
#endif
876876

877877
switch (ops) {
878-
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
878+
case BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE:
879879
if (!readonly_p) {
880880
STAT_INCR(sync_prereadwrite);
881881
cpu_dcache_wbinv_range(va, len);
@@ -932,7 +932,7 @@ _bus_dmamap_sync_segment(vaddr_t va, paddr_t pa, vsize_t len, int ops,
932932
* Since these can't be dirty, we can just invalidate them and don't
933933
* have to worry about having to write back their contents.
934934
*/
935-
case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
935+
case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
936936
STAT_INCR(sync_postreadwrite);
937937
cpu_dcache_inv_range(va, len);
938938
cpu_sdcache_inv_range(va, pa, len);
@@ -1102,8 +1102,8 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
11021102
/*
11031103
* Mixing of PRE and POST operations is not allowed.
11041104
*/
1105-
if ((ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) != 0 &&
1106-
(ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) != 0)
1105+
if ((ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0 &&
1106+
(ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) != 0)
11071107
panic("%s: mix PRE and POST", __func__);
11081108

11091109
KASSERTMSG(offset < map->dm_mapsize,
@@ -1121,7 +1121,7 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
11211121
* here in case a write-back is required by the back-end.
11221122
*
11231123
* PREWRITE -- Write-back the D-cache. Note that if
1124-
* we are doing a PREREAD|PREWRITE, we can collapse
1124+
* we are doing a PREREAD | PREWRITE, we can collapse
11251125
* the whole thing into a single Wb-Inv.
11261126
*
11271127
* POSTREAD -- Re-invalidate the D-cache in case speculative
@@ -1136,9 +1136,9 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
11361136
const bool bouncing = false;
11371137
#endif
11381138

1139-
const int pre_ops = ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1139+
const int pre_ops = ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
11401140
#if defined(CPU_CORTEX) || defined(CPU_ARMV8)
1141-
const int post_ops = ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1141+
const int post_ops = ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
11421142
#else
11431143
const int post_ops = 0;
11441144
#endif
@@ -1196,38 +1196,77 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
11961196
}
11971197
}
11981198

1199-
/* Skip cache frobbing if mapping was COHERENT */
1199+
/*
1200+
* Provide appropriate memory barriers, and skip cache frobbing
1201+
* if mapping is COHERENT.
1202+
*
1203+
* The case of PREREAD is as follows:
1204+
*
1205+
* 1. loads and stores before DMA buffer may be allocated for the purpose
1206+
* 2. bus_dmamap_sync(BUS_DMASYNC_PREREAD)
1207+
* 3. store to register or DMA descriptor to trigger DMA
1208+
*
1209+
* The load/store-before-store ordering is ensured by dma_rw_w().
1210+
*
1211+
* The case of PREWRITE is as follows:
1212+
*
1213+
* 1. stores to DMA buffer. loads can happen later as the buffer is not changed
1214+
* by the device.
1215+
* 2. bus_dmamap_sync(BUS_DMASYNC_PREWRITE)
1216+
* 3. store to register or DMA descriptor to trigger DMA
1217+
*
1218+
* The store-before-store ordering is ensured by dma_w_w().
1219+
*
1220+
* The case of POSTREAD is as follows:
1221+
*
1222+
* 1. load from register or DMA descriptor notifying DMA completion
1223+
* 2. bus_dmamap_sync(BUS_DMASYNC_POSTREAD)
1224+
* 3. loads from DMA buffer to use data, and stores to reuse buffer
1225+
*
1226+
* The stores in (3) will not be speculated and, therefore, don't need
1227+
* specific handling. The load-before-load ordering is ensured by
1228+
* dma_r_r().
1229+
*
1230+
* The case of POSTWRITE is as follows:
1231+
*
1232+
* 1. load from register or DMA descriptor notifying DMA completion
1233+
* 2. bus_dmamap_sync(BUS_DMASYNC_POSTWRITE)
1234+
* 3. loads and stores to reuse buffer
1235+
*
1236+
* The stores in (3) will not be speculated, and the load can happen at any
1237+
* time as the DMA buffer is not changed by the device so no barrier is
1238+
* required.
1239+
*/
1240+
12001241
if ((map->_dm_flags & _BUS_DMAMAP_COHERENT)) {
12011242
switch (ops) {
1202-
case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
1243+
case BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE:
1244+
dma_rw_w();
12031245
STAT_INCR(sync_coherent_prereadwrite);
12041246
break;
12051247

12061248
case BUS_DMASYNC_PREREAD:
1249+
dma_rw_w();
12071250
STAT_INCR(sync_coherent_preread);
12081251
break;
12091252

12101253
case BUS_DMASYNC_PREWRITE:
1254+
dma_w_w();
12111255
STAT_INCR(sync_coherent_prewrite);
12121256
break;
12131257

1214-
case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
1258+
case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
1259+
dma_r_r();
12151260
STAT_INCR(sync_coherent_postreadwrite);
12161261
break;
12171262

12181263
case BUS_DMASYNC_POSTREAD:
1264+
dma_r_r();
12191265
STAT_INCR(sync_coherent_postread);
12201266
break;
12211267

12221268
/* BUS_DMASYNC_POSTWRITE was aleady handled as a fastpath */
12231269
}
1224-
/*
1225-
* Drain the write buffer of DMA operators.
1226-
* 1) when cpu->device (prewrite)
1227-
* 2) when device->cpu (postread)
1228-
*/
1229-
if ((pre_ops & BUS_DMASYNC_PREWRITE) || (post_ops & BUS_DMASYNC_POSTREAD))
1230-
cpu_drain_writebuf();
12311270

12321271
/*
12331272
* Only thing left to do for COHERENT mapping is copy from bounce

sys/arch/arm/include/cpufunc.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,20 @@
7575

7676
#endif
7777

78+
#if defined(_ARM_ARCH_8)
79+
#define dma_r_r() dsb(oshld) // actually r_rw
80+
#define dma_w_w() dsb(oshst)
81+
#define dma_rw_w() dsb(osh) // actually rw_rw
82+
#elif defined(_ARM_ARCH_6)
83+
#define dma_r_r() dsb(osh) // actually rw_rw
84+
#define dma_w_w() dsb(oshst)
85+
#define dma_rw_w() dsb(osh) // actually rw_rw
86+
#else
87+
#define dma_r_r() __nothing
88+
#define dma_w_w() cpu_drain_writebuf()
89+
#define dma_rw_w() cpu_drain_writebuf()
90+
#endif
91+
7892
#ifdef __arm__
7993

8094
#ifdef _KERNEL

0 commit comments

Comments
 (0)