Skip to content

Commit fcd1ecc

Browse files
committed
Merge branch 'cxgb4-ch_ktls-fixes-in-nic-tls-code'
Rohit Maheshwari says: ==================== cxgb4/ch_ktls: Fixes in nic tls code This series helps in fixing multiple nic ktls issues. Series is broken into 12 patches. Patch 1 avoids deciding tls packet based on decrypted bit. If its a retransmit packet which has tls handshake and finish (for encryption), decrypted bit won't be set there, and so we can't rely on decrypted bit. Patch 2 helps supporting linear skb. SKBs were assumed non-linear. Corrected the length extraction. Patch 3 fixes the checksum offload update in WR. Patch 4 fixes kernel panic happening due to creating new skb for each record. As part of fix driver will use same skb to send out one tls record (partial data) of the same SKB. Patch 5 fixes the problem of skb data length smaller than remaining data of the record. Patch 6 fixes the handling of SKBs which has tls header alone pkt, but not starting from beginning. Patch 7 avoids sending extra data which is used to make a record 16 byte aligned. We don't need to retransmit those extra few bytes. Patch 8 handles the cases where retransmit packet has tls starting exchanges which are prior to tls start marker. Patch 9 fixes the problem os skb free before HW knows about tcp FIN. Patch 10 handles the small packet case which has partial TAG bytes only. HW can't handle those, hence using sw crypto for such pkts. Patch 11 corrects the potential tcb update problem. Patch 12 stops the queue if queue reaches threshold value. v1->v2: - Corrected fixes tag issue. - Marked chcr_ktls_sw_fallback() static. v2->v3: - Replaced GFP_KERNEL with GFP_ATOMIC. - Removed mixed fixes. v3->v4: - Corrected fixes tag issue. v4->v5: - Separated mixed fixes from patch 4. v5-v6: - Fixes tag should be at the end. ==================== Link: https://lore.kernel.org/r/20201109105142.15398-1-rohitm@chelsio.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 3611823 + 83a95df commit fcd1ecc

7 files changed

Lines changed: 478 additions & 228 deletions

File tree

drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
21242124
void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
21252125
struct ulptx_sgl *sgl, u64 *end, unsigned int start,
21262126
const dma_addr_t *addr);
2127+
void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
2128+
struct ulptx_sgl *sgl, u64 *end,
2129+
const dma_addr_t *addr, u32 start, u32 send_len);
21272130
void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
21282131
int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
21292132
u16 vlan);

drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3573,6 +3573,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
35733573
atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
35743574
seq_printf(seq, "TX trim pkts : %20llu\n",
35753575
atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
3576+
seq_printf(seq, "TX sw fallback : %20llu\n",
3577+
atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback));
35763578
while (i < MAX_NPORTS) {
35773579
ktls_port = &adap->ch_ktls_stats.ktls_port[i];
35783580
seq_printf(seq, "Port %d\n", i);

drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
11761176
txq = netdev_pick_tx(dev, skb, sb_dev);
11771177
if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
11781178
skb->encapsulation ||
1179+
cxgb4_is_ktls_skb(skb) ||
11791180
(proto != IPPROTO_TCP && proto != IPPROTO_UDP))
11801181
txq = txq % pi->nqsets;
11811182

drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ struct ch_ktls_stats_debug {
388388
atomic64_t ktls_tx_retransmit_pkts;
389389
atomic64_t ktls_tx_complete_pkts;
390390
atomic64_t ktls_tx_trimmed_pkts;
391+
atomic64_t ktls_tx_fallback;
391392
};
392393
#endif
393394

@@ -493,6 +494,11 @@ struct cxgb4_uld_info {
493494
#endif
494495
};
495496

497+
static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb)
498+
{
499+
return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
500+
}
501+
496502
void cxgb4_uld_enable(struct adapter *adap);
497503
void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
498504
int cxgb4_unregister_uld(enum cxgb4_uld type);

drivers/net/ethernet/chelsio/cxgb4/sge.c

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
890890
}
891891
EXPORT_SYMBOL(cxgb4_write_sgl);
892892

893+
/* cxgb4_write_partial_sgl - populate SGL for partial packet
894+
* @skb: the packet
895+
* @q: the Tx queue we are writing into
896+
* @sgl: starting location for writing the SGL
897+
* @end: points right after the end of the SGL
898+
* @addr: the list of bus addresses for the SGL elements
899+
* @start: start offset in the SKB where partial data starts
900+
* @len: length of data from @start to send out
901+
*
902+
* This API will handle sending out partial data of a skb if required.
903+
* Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
904+
* and @len will decide how much data after @start offset to send out.
905+
*/
906+
void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
907+
struct ulptx_sgl *sgl, u64 *end,
908+
const dma_addr_t *addr, u32 start, u32 len)
909+
{
910+
struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
911+
u32 frag_size, skb_linear_data_len = skb_headlen(skb);
912+
struct skb_shared_info *si = skb_shinfo(skb);
913+
u8 i = 0, frag_idx = 0, nfrags = 0;
914+
skb_frag_t *frag;
915+
916+
/* Fill the first SGL either from linear data or from partial
917+
* frag based on @start.
918+
*/
919+
if (unlikely(start < skb_linear_data_len)) {
920+
frag_size = min(len, skb_linear_data_len - start);
921+
sgl->len0 = htonl(frag_size);
922+
sgl->addr0 = cpu_to_be64(addr[0] + start);
923+
len -= frag_size;
924+
nfrags++;
925+
} else {
926+
start -= skb_linear_data_len;
927+
frag = &si->frags[frag_idx];
928+
frag_size = skb_frag_size(frag);
929+
/* find the first frag */
930+
while (start >= frag_size) {
931+
start -= frag_size;
932+
frag_idx++;
933+
frag = &si->frags[frag_idx];
934+
frag_size = skb_frag_size(frag);
935+
}
936+
937+
frag_size = min(len, skb_frag_size(frag) - start);
938+
sgl->len0 = cpu_to_be32(frag_size);
939+
sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
940+
len -= frag_size;
941+
nfrags++;
942+
frag_idx++;
943+
}
944+
945+
/* If the entire partial data fit in one SGL, then send it out
946+
* now.
947+
*/
948+
if (!len)
949+
goto done;
950+
951+
/* Most of the complexity below deals with the possibility we hit the
952+
* end of the queue in the middle of writing the SGL. For this case
953+
* only we create the SGL in a temporary buffer and then copy it.
954+
*/
955+
to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
956+
957+
/* If the skb couldn't fit in first SGL completely, fill the
958+
* rest of the frags in subsequent SGLs. Note that each SGL
959+
* pair can store 2 frags.
960+
*/
961+
while (len) {
962+
frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
963+
to->len[i & 1] = cpu_to_be32(frag_size);
964+
to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
965+
if (i && (i & 1))
966+
to++;
967+
nfrags++;
968+
frag_idx++;
969+
i++;
970+
len -= frag_size;
971+
}
972+
973+
/* If we ended in an odd boundary, then set the second SGL's
974+
* length in the pair to 0.
975+
*/
976+
if (i & 1)
977+
to->len[1] = cpu_to_be32(0);
978+
979+
/* Copy from temporary buffer to Tx ring, in case we hit the
980+
* end of the queue in the middle of writing the SGL.
981+
*/
982+
if (unlikely((u8 *)end > (u8 *)q->stat)) {
983+
u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
984+
985+
if (likely(part0))
986+
memcpy(sgl->sge, buf, part0);
987+
part1 = (u8 *)end - (u8 *)q->stat;
988+
memcpy(q->desc, (u8 *)buf + part0, part1);
989+
end = (void *)q->desc + part1;
990+
}
991+
992+
/* 0-pad to multiple of 16 */
993+
if ((uintptr_t)end & 8)
994+
*end = 0;
995+
done:
996+
sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
997+
ULPTX_NSGE_V(nfrags));
998+
}
999+
EXPORT_SYMBOL(cxgb4_write_partial_sgl);
1000+
8931001
/* This function copies 64 byte coalesced work request to
8941002
* memory mapped BAR2 space. For coalesced WR SGE fetches
8951003
* data from the FIFO instead of from Host.
@@ -1422,7 +1530,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
14221530
#endif /* CHELSIO_IPSEC_INLINE */
14231531

14241532
#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
1425-
if (skb->decrypted)
1533+
if (cxgb4_is_ktls_skb(skb) &&
1534+
(skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb))))
14261535
return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
14271536
#endif /* CHELSIO_TLS_DEVICE */
14281537

0 commit comments

Comments
 (0)