Sync some patchs from upstreaming and modifies are as follow: - maintainers: update for hns3 driver - app/testpmd: add command to flush multicast MAC addresses - app/testpmd: fix help string - app/testpmd: fix multicast address pool leak - net/hns3: optimize SVE Rx performance - net/hns3: optimize rearm mbuf for SVE Rx - net/hns3: optimize free mbuf for SVE Tx - net/hns3: fix order in NEON Rx - net/hns3: fix traffic management dump text alignment - net/hns3: fix traffic management thread safety - net/hns3: fix flushing multicast MAC address - net/hns3: fix error code for multicast resource - net/hns3: fix VF default MAC modified when set failed - net/hns3: fix index to look up table in NEON Rx - net/hns3: fix non-zero weight for disabled TC - config/arm: add HiSilicon HIP10 Signed-off-by: Dengdui Huang <huangdengdui@huawei.com>
156 lines
5.9 KiB
Diff
156 lines
5.9 KiB
Diff
From 7739ae6472f1dc986ce72d24ff3fcdd1a1eccc3f Mon Sep 17 00:00:00 2001
|
|
From: Huisong Li <lihuisong@huawei.com>
|
|
Date: Tue, 11 Jul 2023 18:24:45 +0800
|
|
Subject: [PATCH 359/366] net/hns3: fix order in NEON Rx
|
|
|
|
[ upstream commit 7dd439ed998c36c8d0204c436cc656af08cfa5fc ]
|
|
|
|
This patch reorders the order of the NEON Rx for better maintenance
|
|
and easier understanding.
|
|
|
|
Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
|
|
Cc: stable@dpdk.org
|
|
|
|
Signed-off-by: Huisong Li <lihuisong@huawei.com>
|
|
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
|
|
---
|
|
drivers/net/hns3/hns3_rxtx_vec_neon.h | 78 +++++++++++----------------
|
|
1 file changed, 31 insertions(+), 47 deletions(-)
|
|
|
|
diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
|
|
index a20a6b6..1048b9d 100644
|
|
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
|
|
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
|
|
@@ -180,19 +180,12 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
|
|
bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
|
|
bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);
|
|
|
|
- /* load 2 mbuf pointer */
|
|
- mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
|
|
-
|
|
bd_vld = vshl_n_u16(bd_vld,
|
|
HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
|
|
bd_vld = vreinterpret_u16_s16(
|
|
vshr_n_s16(vreinterpret_s16_u16(bd_vld),
|
|
HNS3_UINT16_BIT - 1));
|
|
stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
|
|
-
|
|
- /* load 2 mbuf pointer again */
|
|
- mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
|
|
-
|
|
if (likely(stat == 0))
|
|
bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
|
|
else
|
|
@@ -200,20 +193,20 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
|
|
if (bd_valid_num == 0)
|
|
break;
|
|
|
|
- /* use offset to control below data load oper ordering */
|
|
- offset = rxq->offset_table[bd_valid_num];
|
|
+ /* load 4 mbuf pointer */
|
|
+ mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
|
|
+ mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
|
|
|
|
- /* store 2 mbuf pointer into rx_pkts */
|
|
+ /* store 4 mbuf pointer into rx_pkts */
|
|
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
|
|
+ vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
|
|
|
|
- /* read first two descs */
|
|
+ /* use offset to control below data load oper ordering */
|
|
+ offset = rxq->offset_table[bd_valid_num];
|
|
+
|
|
+ /* read 4 descs */
|
|
descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
|
|
descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
|
|
-
|
|
- /* store 2 mbuf pointer into rx_pkts again */
|
|
- vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
|
|
-
|
|
- /* read remains two descs */
|
|
descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
|
|
descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));
|
|
|
|
@@ -221,56 +214,47 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
|
|
pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
|
|
pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
|
|
pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
|
|
+ pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
|
|
+ pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
|
|
+ pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
|
|
+ pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
|
|
|
|
- /* pkt 1,2 convert format from desc to pktmbuf */
|
|
+ /* 4 packets convert format from desc to pktmbuf */
|
|
pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
|
|
pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
|
|
+ pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
|
|
+ pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
|
|
|
|
- /* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
|
|
- *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
|
|
- rxq->mbuf_initializer;
|
|
- *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
|
|
- rxq->mbuf_initializer;
|
|
-
|
|
- /* pkt 1,2 remove crc */
|
|
+ /* 4 packets remove crc */
|
|
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
|
|
pkt_mb1 = vreinterpretq_u8_u16(tmp);
|
|
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
|
|
pkt_mb2 = vreinterpretq_u8_u16(tmp);
|
|
+ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
|
|
+ pkt_mb3 = vreinterpretq_u8_u16(tmp);
|
|
+ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
|
|
+ pkt_mb4 = vreinterpretq_u8_u16(tmp);
|
|
|
|
- pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
|
|
- pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
|
|
- pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
|
|
- pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
|
|
-
|
|
- /* pkt 3,4 convert format from desc to pktmbuf */
|
|
- pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
|
|
- pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
|
|
-
|
|
- /* pkt 1,2 save to rx_pkts mbuf */
|
|
+ /* save packet info to rx_pkts mbuf */
|
|
vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
|
|
pkt_mb1);
|
|
vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
|
|
pkt_mb2);
|
|
+ vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
|
|
+ pkt_mb3);
|
|
+ vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
|
|
+ pkt_mb4);
|
|
|
|
- /* pkt 3,4 remove crc */
|
|
- tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
|
|
- pkt_mb3 = vreinterpretq_u8_u16(tmp);
|
|
- tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
|
|
- pkt_mb4 = vreinterpretq_u8_u16(tmp);
|
|
-
|
|
- /* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
|
|
+ /* store the first 8 bytes of packets mbuf's rearm_data */
|
|
+ *(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
|
|
+ rxq->mbuf_initializer;
|
|
+ *(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
|
|
+ rxq->mbuf_initializer;
|
|
*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
|
|
rxq->mbuf_initializer;
|
|
*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
|
|
rxq->mbuf_initializer;
|
|
|
|
- /* pkt 3,4 save to rx_pkts mbuf */
|
|
- vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
|
|
- pkt_mb3);
|
|
- vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
|
|
- pkt_mb4);
|
|
-
|
|
rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);
|
|
|
|
parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
|
|
--
|
|
2.41.0.windows.2
|
|
|