Skip to content

Commit c3c9402

Browse files
ayalevin123Saeed Mahameed
authored andcommitted
net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU
Prior to this fix, in Striding RQ mode the driver was vulnerable when receiving packets in the range (stride size - headroom, stride size]. Where stride size is calculated by mtu+headroom+tailroom aligned to the closest power of 2. Usually, this filtering is performed by the HW, except for a few cases: - Between 2 VFs over the same PF with different MTUs - On bluefield, when the host physical function sets a larger MTU than the ARM has configured on its representor and uplink representor. When the HW filtering is not present, packets that are larger than MTU might be harmful for the RQ's integrity, in the following impacts: 1) Overflow from one WQE to the next, causing a memory corruption that in most cases is unharmful: as the write happens to the headroom of next packet, which will be overwritten by build_skb(). In very rare cases, high stress/load, this is harmful. When the next WQE is not yet reposted and points to existing SKB head. 2) Each oversize packet overflows to the headroom of the next WQE. On the last WQE of the WQ, where addresses wrap-around, the address of the remainder headroom does not belong to the next WQE, but it is out of the memory region range. This results in a HW CQE error that moves the RQ into an error state. Solution: Add a page buffer at the end of each WQE to absorb the leak. Actually the maximal overflow size is headroom but since all memory units must be of the same size, we use page size to comply with UMR WQEs. The increase in memory consumption is of a single page per RQ. Initialize the mkey with all MTTs pointing to a default page. When the channels are activated, UMR WQEs will redirect the RX WQEs to the actual memory from the RQ's pool, while the overflow MTTs remain mapped to the default page. Fixes: 73281b7 ("net/mlx5e: Derive Striding RQ size from MTU") Signed-off-by: Aya Levin <ayal@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent 08a762c commit c3c9402

2 files changed

Lines changed: 58 additions & 5 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,12 @@ struct page_pool;
9191
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
9292

9393
#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
94-
#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
94+
/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
95+
* WQEs, This page will absorb write overflow by the hardware, when
96+
* receiving packets larger than MTU. These oversize packets are
97+
* dropped by the driver at a later stage.
98+
*/
99+
#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
95100
#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
96101
#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
97102
#define MLX5E_MAX_RQ_NUM_MTTS \
@@ -617,6 +622,7 @@ struct mlx5e_rq {
617622
u32 rqn;
618623
struct mlx5_core_dev *mdev;
619624
struct mlx5_core_mkey umr_mkey;
625+
struct mlx5e_dma_info wqe_overflow;
620626

621627
/* XDP read-mostly */
622628
struct xdp_rxq_info xdp_rxq;

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
246246

247247
static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
248248
u64 npages, u8 page_shift,
249-
struct mlx5_core_mkey *umr_mkey)
249+
struct mlx5_core_mkey *umr_mkey,
250+
dma_addr_t filler_addr)
250251
{
251-
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
252+
struct mlx5_mtt *mtt;
253+
int inlen;
252254
void *mkc;
253255
u32 *in;
254256
int err;
257+
int i;
258+
259+
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
255260

256261
in = kvzalloc(inlen, GFP_KERNEL);
257262
if (!in)
@@ -271,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
271276
MLX5_SET(mkc, mkc, translations_octword_size,
272277
MLX5_MTT_OCTW(npages));
273278
MLX5_SET(mkc, mkc, log_page_size, page_shift);
279+
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
280+
MLX5_MTT_OCTW(npages));
281+
282+
/* Initialize the mkey with all MTTs pointing to a default
283+
* page (filler_addr). When the channels are activated, UMR
284+
* WQEs will redirect the RX WQEs to the actual memory from
285+
* the RQ's pool, while the gaps (wqe_overflow) remain mapped
286+
* to the default page.
287+
*/
288+
mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
289+
for (i = 0 ; i < npages ; i++)
290+
mtt[i].ptag = cpu_to_be64(filler_addr);
274291

275292
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
276293

@@ -282,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
282299
{
283300
u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
284301

285-
return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
302+
return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
303+
rq->wqe_overflow.addr);
286304
}
287305

288306
static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
@@ -350,6 +368,28 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
350368
mlx5e_reporter_rq_cqe_err(rq);
351369
}
352370

371+
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
372+
{
373+
rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
374+
if (!rq->wqe_overflow.page)
375+
return -ENOMEM;
376+
377+
rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
378+
PAGE_SIZE, rq->buff.map_dir);
379+
if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
380+
__free_page(rq->wqe_overflow.page);
381+
return -ENOMEM;
382+
}
383+
return 0;
384+
}
385+
386+
static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
387+
{
388+
dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
389+
rq->buff.map_dir);
390+
__free_page(rq->wqe_overflow.page);
391+
}
392+
353393
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
354394
struct mlx5e_params *params,
355395
struct mlx5e_xsk_param *xsk,
@@ -409,6 +449,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
409449
if (err)
410450
goto err_rq_xdp;
411451

452+
err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
453+
if (err)
454+
goto err_rq_wq_destroy;
455+
412456
rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
413457

414458
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
@@ -424,7 +468,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
424468

425469
err = mlx5e_create_rq_umr_mkey(mdev, rq);
426470
if (err)
427-
goto err_rq_wq_destroy;
471+
goto err_rq_drop_page;
428472
rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
429473

430474
err = mlx5e_rq_alloc_mpwqe_info(rq, c);
@@ -548,6 +592,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
548592
kvfree(rq->mpwqe.info);
549593
err_rq_mkey:
550594
mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
595+
err_rq_drop_page:
596+
mlx5e_free_mpwqe_rq_drop_page(rq);
551597
break;
552598
default: /* MLX5_WQ_TYPE_CYCLIC */
553599
mlx5e_free_di_list(rq);
@@ -582,6 +628,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
582628
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
583629
kvfree(rq->mpwqe.info);
584630
mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
631+
mlx5e_free_mpwqe_rq_drop_page(rq);
585632
break;
586633
default: /* MLX5_WQ_TYPE_CYCLIC */
587634
kvfree(rq->wqe.frags);

0 commit comments

Comments
 (0)