expand-data-recv-buff

(cherry picked from commit c54a58362219dd7cc043a743b82146d857af0214)
This commit is contained in:
wu-changsheng 2022-12-20 16:05:15 +08:00 committed by openeuler-sync-bot
parent fbef78b17b
commit 1a53a32b0d
3 changed files with 528 additions and 1 deletions

View File

@ -0,0 +1,165 @@
From bd6d329cc57086996302fdd46fbd434f945ecd26 Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Tue, 20 Dec 2022 15:39:39 +0800
Subject: [PATCH 1/2] optimite-send-pkts-dul-index
---
src/common/gazelle_dfx_msg.h | 2 +-
src/lstack/core/lstack_protocol_stack.c | 23 +++++++++++-----
src/lstack/include/lstack_protocol_stack.h | 8 ++++--
src/lstack/netif/lstack_ethdev.c | 31 ++++++++++++++--------
src/ltran/ltran_dfx.c | 2 +-
5 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h
index d14f1b9..83b6fe9 100644
--- a/src/common/gazelle_dfx_msg.h
+++ b/src/common/gazelle_dfx_msg.h
@@ -55,7 +55,7 @@ enum GAZELLE_LATENCY_TYPE {
struct gazelle_stack_stat {
uint64_t wakeup_events;
uint64_t write_lwip_cnt;
- uint64_t send_self_rpc;
+ uint64_t send_pkts_fail;
uint64_t read_lwip_drop;
uint64_t read_lwip_cnt;
uint64_t rx_allocmbuf_fail;
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index 870e496..16b124e 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -413,20 +413,29 @@ static void wakeup_kernel_event(struct protocol_stack *stack)
stack->kernel_event_num = 0;
}
-static void stack_send_pkts(struct protocol_stack *stack)
+void stack_send_pkts(struct protocol_stack *stack)
{
- if (stack->send_cnt == 0) {
+ uint32_t send_num = stack->send_end - stack->send_start;
+
+ if (send_num == 0) {
return;
}
- uint32_t sent_pkts = stack->dev_ops.tx_xmit(stack, stack->send_pkts, stack->send_cnt);
- if (sent_pkts < stack->send_cnt && sent_pkts != 0) {
- for (uint32_t i = sent_pkts; i < stack->send_cnt; i++) {
- stack->send_pkts[i - sent_pkts] = stack->send_pkts[i];
+ uint32_t start = stack->send_start & STACK_SEND_MASK;
+ uint32_t end = stack->send_end & STACK_SEND_MASK;
+ uint32_t sent_pkts = 0;
+
+ if (start < end) {
+ sent_pkts = stack->dev_ops.tx_xmit(stack, &stack->send_pkts[start], send_num);
+ } else {
+ send_num = STACK_SEND_MAX - start;
+ sent_pkts = stack->dev_ops.tx_xmit(stack, &stack->send_pkts[start], send_num);
+ if (sent_pkts == send_num) {
+ sent_pkts += stack->dev_ops.tx_xmit(stack, stack->send_pkts, end);
}
}
- stack->send_cnt -= sent_pkts;
+ stack->send_start += sent_pkts;
stack->stats.tx += sent_pkts;
}
diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h
index 9463707..c706afc 100644
--- a/src/lstack/include/lstack_protocol_stack.h
+++ b/src/lstack/include/lstack_protocol_stack.h
@@ -29,7 +29,8 @@
#define SOCK_SEND_RING_SIZE (32)
#define SOCK_SEND_REPLENISH_THRES (16)
#define WAKEUP_MAX_NUM (32)
-#define STACK_SEND_MAX RTE_TEST_TX_DESC_DEFAULT
+#define STACK_SEND_MAX 2048
+#define STACK_SEND_MASK (STACK_SEND_MAX - 1)
struct rte_mempool;
struct rte_ring;
@@ -66,7 +67,8 @@ struct protocol_stack {
uint32_t rx_ring_used;
uint32_t tx_ring_used;
- uint16_t send_cnt;
+ uint32_t send_start;
+ uint32_t send_end;
struct rte_mbuf *send_pkts[STACK_SEND_MAX];
struct rte_mbuf *pkts[RTE_TEST_RX_DESC_DEFAULT];
struct list_node recv_list;
@@ -131,6 +133,8 @@ int32_t stack_broadcast_accept4(int32_t fd, struct sockaddr *addr, socklen_t *ad
struct wakeup_poll;
void stack_broadcast_clean_epoll(struct wakeup_poll *wakeup);
+void stack_send_pkts(struct protocol_stack *stack);
+
struct rpc_msg;
void stack_clean_epoll(struct rpc_msg *msg);
void stack_arp(struct rpc_msg *msg);
diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c
index d870dd3..5729ebf 100644
--- a/src/lstack/netif/lstack_ethdev.c
+++ b/src/lstack/netif/lstack_ethdev.c
@@ -147,6 +147,25 @@ int32_t gazelle_eth_dev_poll(struct protocol_stack *stack, bool use_ltran_flag,
return nr_pkts;
}
+static void add_send_pkt(struct protocol_stack *stack, struct rte_mbuf *mbuf)
+{
+ if (stack->send_end + 1 != stack->send_start) {
+ stack->send_pkts[stack->send_end & STACK_SEND_MASK] = mbuf;
+ stack->send_end++;
+ return;
+ }
+
+ stack->stats.send_pkts_fail++;
+ do {
+ stack_send_pkts(stack);
+ if (stack->send_end + 1 != stack->send_start) {
+ stack->send_pkts[stack->send_end & STACK_SEND_MASK] = mbuf;
+ stack->send_end++;
+ return;
+ }
+ } while (1);
+}
+
static err_t eth_dev_output(struct netif *netif, struct pbuf *pbuf)
{
struct protocol_stack *stack = get_protocol_stack();
@@ -197,17 +216,7 @@ static err_t eth_dev_output(struct netif *netif, struct pbuf *pbuf)
pbuf = pbuf->next;
}
- if (stack->send_cnt + 1 < STACK_SEND_MAX) {
- stack->send_pkts[stack->send_cnt++] = first_mbuf;
- } else {
- uint32_t sent_pkts = stack->dev_ops.tx_xmit(stack, &first_mbuf, 1);
- stack->stats.tx += sent_pkts;
- if (sent_pkts < 1) {
- stack->stats.tx_drop++;
- rte_pktmbuf_free(first_mbuf);
- return ERR_MEM;
- }
- }
+ add_send_pkt(stack, first_mbuf);
return ERR_OK;
}
diff --git a/src/ltran/ltran_dfx.c b/src/ltran/ltran_dfx.c
index 3801f19..651f279 100644
--- a/src/ltran/ltran_dfx.c
+++ b/src/ltran/ltran_dfx.c
@@ -579,7 +579,7 @@ static void show_lstack_stats(struct gazelle_stack_dfx_data *lstack_stat)
printf("call_msg: %-19"PRIu64" ", lstack_stat->data.pkts.call_msg_cnt);
printf("call_alloc_fail: %-12"PRIu64" ", lstack_stat->data.pkts.call_alloc_fail);
printf("call_null: %-18"PRIu64" \n", lstack_stat->data.pkts.stack_stat.call_null);
- printf("send_self_rpc: %-14"PRIu64" \n", lstack_stat->data.pkts.stack_stat.send_self_rpc);
+ printf("send_pkts_fail: %-13"PRIu64" \n", lstack_stat->data.pkts.stack_stat.send_pkts_fail);
}
static void gazelle_print_lstack_stat_detail(struct gazelle_stack_dfx_data *lstack_stat,
--
2.23.0

View File

@ -0,0 +1,357 @@
From da54963163baf9213c8cd34da6ec3c533ab1ef9d Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Tue, 20 Dec 2022 15:42:33 +0800
Subject: [PATCH 2/2] expand-data-recv-buff
---
src/common/dpdk_common.h | 2 +-
src/lstack/core/lstack_lwip.c | 202 ++++++++++++++++++++++++-------
src/lstack/include/lstack_dpdk.h | 2 +
3 files changed, 164 insertions(+), 42 deletions(-)
diff --git a/src/common/dpdk_common.h b/src/common/dpdk_common.h
index 63d651d..c93f506 100644
--- a/src/common/dpdk_common.h
+++ b/src/common/dpdk_common.h
@@ -193,7 +193,7 @@ static __rte_always_inline uint32_t gazelle_ring_read(struct rte_ring *r, void *
__rte_ring_dequeue_elems(r, prod, obj_table, sizeof(void *), n);
- r->prod.head = prod + n;
+ __atomic_store_n(&r->prod.head, prod + n, __ATOMIC_RELEASE);
return n;
}
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
index 0b9b684..32d21b6 100644
--- a/src/lstack/core/lstack_lwip.c
+++ b/src/lstack/core/lstack_lwip.c
@@ -83,6 +83,11 @@ static void reset_sock_data(struct lwip_sock *sock)
sock->send_lastdata = NULL;
}
+ if (sock->lwip_lastdata) {
+ free_list_pbuf(sock->lwip_lastdata);
+ sock->lwip_lastdata = NULL;
+ }
+
if (sock->send_pre_del) {
pbuf_free(sock->send_pre_del);
sock->send_pre_del = NULL;
@@ -95,6 +100,7 @@ static void reset_sock_data(struct lwip_sock *sock)
sock->events = 0;
sock->in_send = 0;
sock->remain_len = 0;
+ sock->read_wait = false;
if (sock->recv_lastdata) {
pbuf_free(sock->recv_lastdata);
@@ -185,7 +191,6 @@ void gazelle_init_sock(int32_t fd)
init_list_node_null(&sock->recv_list);
init_list_node_null(&sock->event_list);
init_list_node_null(&sock->send_list);
- pthread_spin_init(&sock->sock_lock, PTHREAD_PROCESS_PRIVATE);
}
void gazelle_clean_sock(int32_t fd)
@@ -207,7 +212,6 @@ void gazelle_clean_sock(int32_t fd)
list_del_node_null(&sock->recv_list);
list_del_node_null(&sock->send_list);
- pthread_spin_destroy(&sock->sock_lock);
}
void gazelle_free_pbuf(struct pbuf *pbuf)
@@ -636,64 +640,166 @@ static inline void free_recv_ring_readover(struct rte_ring *ring)
}
}
-ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags)
+static inline struct pbuf *gazelle_ring_enqueuelast(struct rte_ring *r)
{
- if (sock->conn->recvmbox == NULL) {
- return 0;
+ struct pbuf *last_pbuf = NULL;
+ volatile uint32_t head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
+ uint32_t last = r->cons.head - 1;
+ if (last == head || last - head > r->capacity) {
+ return NULL;
}
- free_recv_ring_readover(sock->recv_ring);
+ __rte_ring_dequeue_elems(r, last, (void **)&last_pbuf, sizeof(void *), 1);
+ __atomic_store_n(&last_pbuf->in_write, 1, __ATOMIC_RELEASE);
- uint32_t free_count = gazelle_ring_free_count(sock->recv_ring);
- if (free_count == 0) {
- GAZELLE_RETURN(EAGAIN);
+ rte_mb();
+
+ head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
+ if (last == head || last - head > r->capacity) {
+ __atomic_store_n(&last_pbuf->in_write, 0, __ATOMIC_RELEASE);
+ return NULL;
}
- uint32_t data_count = rte_ring_count(sock->conn->recvmbox->ring);
- uint32_t read_num = LWIP_MIN(free_count, data_count);
- read_num = LWIP_MIN(read_num, SOCK_RECV_RING_SIZE);
- struct pbuf *pbufs[SOCK_RECV_RING_SIZE];
- uint32_t read_count = 0;
- ssize_t recv_len = 0;
+ return last_pbuf;
+}
+
+static inline struct pbuf *pbuf_last(struct pbuf *pbuf)
+{
+ while (pbuf->next) {
+ pbuf = pbuf->next;
+ }
+ return pbuf;
+}
+
+static struct pbuf *merge_pbufs(struct pbuf *pbufs[], uint32_t data_count, uint32_t data_len)
+{
+ struct pbuf *pre_last = (pbufs[0]->last) ? pbufs[0]->last : pbuf_last(pbufs[0]);
+
+ if (data_count <= 1) {
+ pbufs[0]->last = pre_last;
+ return pbufs[0];
+ }
+
+ for (uint32_t i = 1; i < data_count; i++) {
+ pre_last->next = pbufs[i];
+ pre_last = pbuf_last(pbufs[i]);
+ }
+
+ pbufs[0]->tot_len = data_len;
+ pbufs[0]->last = pre_last;
+
+ return pbufs[0];
+}
- for (uint32_t i = 0; i < read_num; i++) {
+static int32_t get_lwip_pbufs(struct lwip_sock *sock, struct pbuf *pbufs[], uint32_t *data_count, u8_t apiflags)
+{
+ uint32_t data_len = 0;
+
+ for (uint32_t i = 0; i < *data_count; i++) {
err_t err = netconn_recv_tcp_pbuf_flags(sock->conn, &pbufs[i], apiflags);
if (err != ERR_OK) {
- if (recv_len > 0) {
+ *data_count = i;
+ if (data_len > 0) {
/* already received data, return that (this trusts in getting the same error from
netconn layer again next time netconn_recv is called) */
break;
}
-
- return (err == ERR_CLSD) ? 0 : -1;
+ return (err == ERR_CLSD) ? -1 : 0;
}
- recv_len += pbufs[i]->tot_len;
- read_count++;
+ pbufs[i]->last = NULL;
+ pbufs[i]->in_write = 0;
+ data_len += pbufs[i]->tot_len;
/* once we have some data to return, only add more if we don't need to wait */
apiflags |= NETCONN_DONTBLOCK | NETCONN_NOFIN;
}
- if (!(flags & MSG_PEEK)) {
- uint32_t enqueue_num = gazelle_ring_sp_enqueue(sock->recv_ring, (void **)pbufs, read_count);
- for (uint32_t i = enqueue_num; i < read_count; i++) {
- /* update receive window */
- tcp_recved(sock->conn->pcb.tcp, pbufs[i]->tot_len);
- pbuf_free(pbufs[i]);
- sock->stack->stats.read_lwip_drop++;
- }
+ return (int32_t)data_len;
+}
+
+static void put_pbufs_into_recv_ring(struct lwip_sock *sock, struct pbuf *pbufs[],
+ uint32_t data_count, uint32_t data_len)
+{
+ uint32_t free_count = gazelle_ring_free_count(sock->recv_ring);
+
+ if (data_count <= free_count) {
+ (void)gazelle_ring_sp_enqueue(sock->recv_ring, (void **)pbufs, data_count);
+ return;
}
- for (uint32_t i = 0; get_protocol_stack_group()->latency_start && i < read_count; i++) {
- calculate_lstack_latency(&sock->stack->latency, pbufs[i], GAZELLE_LATENCY_LWIP);
+ struct pbuf *new_pbuf = merge_pbufs(pbufs, data_count, data_len);
+
+ if (free_count) {
+ (void)gazelle_ring_sp_enqueue(sock->recv_ring, (void **)&new_pbuf, 1);
+ return;
+ }
+
+ struct pbuf *last_pbuf = gazelle_ring_enqueuelast(sock->recv_ring);
+ if (last_pbuf == NULL) {
+ sock->lwip_lastdata = new_pbuf;
+ return;
}
- sock->stack->stats.read_lwip_cnt += read_count;
- if (recv_len == 0) {
+ if (last_pbuf->last == NULL) {
+ last_pbuf->last = pbuf_last(last_pbuf);
+ }
+ last_pbuf->last->next = new_pbuf;
+ last_pbuf->tot_len += new_pbuf->tot_len;
+ last_pbuf->last = new_pbuf->last;
+ gazelle_ring_lastover(last_pbuf);
+
+ if (last_pbuf->tot_len > SOCK_READ_MAXLEN) {
+ sock->read_wait = true;
+ }
+}
+
+ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags)
+{
+ if (sock->conn->recvmbox == NULL) {
+ return 0;
+ }
+
+ free_recv_ring_readover(sock->recv_ring);
+
+ if (sock->read_wait) {
GAZELLE_RETURN(EAGAIN);
}
- return recv_len;
+
+ struct pbuf *pbufs[SOCK_RECV_RING_SIZE];
+ uint32_t data_count = rte_ring_count(sock->conn->recvmbox->ring);
+ int32_t data_len = get_lwip_pbufs(sock, pbufs, &data_count, apiflags);
+ if (unlikely(data_len < 0)) {
+ /* ERR_CLSD */
+ return 0;
+ } else if (unlikely(data_len == 0) && !sock->lwip_lastdata) {
+ GAZELLE_RETURN(EAGAIN);
+ }
+
+ if (get_protocol_stack_group()->latency_start) {
+ for (uint32_t i = 0; i < data_count; i++) {
+ calculate_lstack_latency(&sock->stack->latency, pbufs[i], GAZELLE_LATENCY_LWIP);
+ }
+ }
+
+ if (data_count) {
+ uint32_t last_len = 0;
+ if (sock->lwip_lastdata) {
+ last_len = sock->lwip_lastdata->tot_len;
+ sock->lwip_lastdata->last->next = pbufs[0];
+ sock->lwip_lastdata->tot_len += pbufs[0]->tot_len;
+ sock->lwip_lastdata->last = pbuf_last(pbufs[0]);
+ pbufs[0] = sock->lwip_lastdata;
+ sock->lwip_lastdata = NULL;
+ }
+ put_pbufs_into_recv_ring(sock, pbufs, data_count, data_len + last_len);
+ } else {
+ put_pbufs_into_recv_ring(sock, &sock->lwip_lastdata, 1, sock->lwip_lastdata->tot_len);
+ sock->lwip_lastdata = NULL;
+ }
+ sock->stack->stats.read_lwip_cnt += data_count;
+
+ return data_len;
}
static int32_t check_msg_vaild(const struct msghdr *message)
@@ -725,9 +831,9 @@ ssize_t recvmsg_from_stack(int32_t s, struct msghdr *message, int32_t flags)
}
for (int32_t i = 0; i < message->msg_iovlen; i++) {
- if (message->msg_iov[i].iov_len == 0){
- continue;
- }
+ if (message->msg_iov[i].iov_len == 0) {
+ continue;
+ }
ssize_t recvd_local = read_stack_data(s, message->msg_iov[i].iov_base, message->msg_iov[i].iov_len, flags);
if (recvd_local > 0) {
@@ -828,7 +934,7 @@ static inline void del_data_in_event(struct lwip_sock *sock)
static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len)
{
- uint16_t tot_len = pbuf->tot_len - free_len;
+ uint32_t tot_len = pbuf->tot_len - free_len;
while (free_len && pbuf) {
if (free_len >= pbuf->len) {
@@ -840,7 +946,9 @@ static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len)
}
}
- pbuf->tot_len = tot_len;
+ if (pbuf) {
+ pbuf->tot_len = tot_len;
+ }
return pbuf;
}
@@ -849,7 +957,7 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
size_t recv_left = len;
struct pbuf *pbuf = NULL;
ssize_t recvd = 0;
- uint16_t copy_len;
+ uint32_t copy_len;
struct lwip_sock *sock = get_socket_by_fd(fd);
bool latency_enable = get_protocol_stack_group()->latency_start;
@@ -870,8 +978,15 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
break;
}
}
+ if (__atomic_load_n(&pbuf->in_write, __ATOMIC_ACQUIRE)) {
+ sock->recv_lastdata = pbuf;
+ break;
+ }
- copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : (uint16_t)recv_left;
+ copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : recv_left;
+ if (copy_len > UINT16_MAX) {
+ copy_len = UINT16_MAX;
+ }
pbuf_copy_partial(pbuf, (char *)buf + recvd, copy_len, 0);
recvd += copy_len;
@@ -879,6 +994,7 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
if (pbuf->tot_len > copy_len) {
sock->recv_lastdata = pbuf_free_partial(pbuf, copy_len);
+ break;
} else {
if (sock->wakeup) {
sock->wakeup->stat.app_read_cnt += 1;
@@ -890,6 +1006,10 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
}
}
+ if (sock->read_wait) {
+ sock->read_wait = false;
+ }
+
/* rte_ring_count reduce lock */
if (sock->wakeup && sock->wakeup->type == WAKEUP_EPOLL && (sock->events & EPOLLIN)) {
del_data_in_event(sock);
diff --git a/src/lstack/include/lstack_dpdk.h b/src/lstack/include/lstack_dpdk.h
index c3bc527..ac068b8 100644
--- a/src/lstack/include/lstack_dpdk.h
+++ b/src/lstack/include/lstack_dpdk.h
@@ -24,6 +24,8 @@
#define MAX_PACKET_SZ 2048
+#define SOCK_READ_MAXLEN 0x200000
+
#define RING_SIZE(x) ((x) - 1)
#define MBUF_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM)
--
2.23.0

View File

@ -2,7 +2,7 @@
Name: gazelle
Version: 1.0.1
Release: 37
Release: 38
Summary: gazelle is a high performance user-mode stack
License: MulanPSL-2.0
URL: https://gitee.com/openeuler/gazelle
@ -180,6 +180,8 @@ Patch9162: 0162-remove-mbuf-reserve-in-mbuf-alloc.patch
Patch9163: 0163-pkts-bulk-send-to-nic.patch
Patch9164: 0164-rpc-dont-send.patch
Patch9165: 0165-recv-pbuf-free-timely.patch
Patch9166: 0166-optimite-send-pkts-dul-index.patch
Patch9167: 0167-expand-data-recv-buff.patch
%description
%{name} is a high performance user-mode stack.
@ -220,6 +222,9 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b
%config(noreplace) %{conf_path}/ltran.conf
%changelog
* Tue Dec 20 2022 wuchangsheng <wuchangsheng2@huawei.com> - 1.0.1-38
- optimite recv data buff and send pkts index
* Sun Dec 18 2022 wuchangsheng <wuchangsheng2@huawei.com> - 1.0.1-37
- pkts-bulk-send-to-nic and rpc-dont-send