diff --git a/0166-optimite-send-pkts-dul-index.patch b/0166-optimite-send-pkts-dul-index.patch new file mode 100644 index 0000000..f95b3ca --- /dev/null +++ b/0166-optimite-send-pkts-dul-index.patch @@ -0,0 +1,165 @@ +From bd6d329cc57086996302fdd46fbd434f945ecd26 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Tue, 20 Dec 2022 15:39:39 +0800 +Subject: [PATCH 1/2] optimite-send-pkts-dul-index + +--- + src/common/gazelle_dfx_msg.h | 2 +- + src/lstack/core/lstack_protocol_stack.c | 23 +++++++++++----- + src/lstack/include/lstack_protocol_stack.h | 8 ++++-- + src/lstack/netif/lstack_ethdev.c | 31 ++++++++++++++-------- + src/ltran/ltran_dfx.c | 2 +- + 5 files changed, 44 insertions(+), 22 deletions(-) + +diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h +index d14f1b9..83b6fe9 100644 +--- a/src/common/gazelle_dfx_msg.h ++++ b/src/common/gazelle_dfx_msg.h +@@ -55,7 +55,7 @@ enum GAZELLE_LATENCY_TYPE { + struct gazelle_stack_stat { + uint64_t wakeup_events; + uint64_t write_lwip_cnt; +- uint64_t send_self_rpc; ++ uint64_t send_pkts_fail; + uint64_t read_lwip_drop; + uint64_t read_lwip_cnt; + uint64_t rx_allocmbuf_fail; +diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c +index 870e496..16b124e 100644 +--- a/src/lstack/core/lstack_protocol_stack.c ++++ b/src/lstack/core/lstack_protocol_stack.c +@@ -413,20 +413,29 @@ static void wakeup_kernel_event(struct protocol_stack *stack) + stack->kernel_event_num = 0; + } + +-static void stack_send_pkts(struct protocol_stack *stack) ++void stack_send_pkts(struct protocol_stack *stack) + { +- if (stack->send_cnt == 0) { ++ uint32_t send_num = stack->send_end - stack->send_start; ++ ++ if (send_num == 0) { + return; + } + +- uint32_t sent_pkts = stack->dev_ops.tx_xmit(stack, stack->send_pkts, stack->send_cnt); +- if (sent_pkts < stack->send_cnt && sent_pkts != 0) { +- for (uint32_t i = sent_pkts; i < stack->send_cnt; i++) { +- stack->send_pkts[i - sent_pkts] = stack->send_pkts[i]; ++ uint32_t start = stack->send_start & STACK_SEND_MASK; ++ uint32_t end = stack->send_end & STACK_SEND_MASK; ++ uint32_t sent_pkts = 0; ++ ++ if (start < end) { ++ sent_pkts = stack->dev_ops.tx_xmit(stack, &stack->send_pkts[start], send_num); ++ } else { ++ send_num = STACK_SEND_MAX - start; ++ sent_pkts = stack->dev_ops.tx_xmit(stack, &stack->send_pkts[start], send_num); ++ if (sent_pkts == send_num) { ++ sent_pkts += stack->dev_ops.tx_xmit(stack, stack->send_pkts, end); + } + } + +- stack->send_cnt -= sent_pkts; ++ stack->send_start += sent_pkts; + stack->stats.tx += sent_pkts; + } + +diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h +index 9463707..c706afc 100644 +--- a/src/lstack/include/lstack_protocol_stack.h ++++ b/src/lstack/include/lstack_protocol_stack.h +@@ -29,7 +29,8 @@ + #define SOCK_SEND_RING_SIZE (32) + #define SOCK_SEND_REPLENISH_THRES (16) + #define WAKEUP_MAX_NUM (32) +-#define STACK_SEND_MAX RTE_TEST_TX_DESC_DEFAULT ++#define STACK_SEND_MAX 2048 ++#define STACK_SEND_MASK (STACK_SEND_MAX - 1) + + struct rte_mempool; + struct rte_ring; +@@ -66,7 +67,8 @@ struct protocol_stack { + uint32_t rx_ring_used; + uint32_t tx_ring_used; + +- uint16_t send_cnt; ++ uint32_t send_start; ++ uint32_t send_end; + struct rte_mbuf *send_pkts[STACK_SEND_MAX]; + struct rte_mbuf *pkts[RTE_TEST_RX_DESC_DEFAULT]; + struct list_node recv_list; +@@ -131,6 +133,8 @@ int32_t stack_broadcast_accept4(int32_t fd, struct sockaddr *addr, socklen_t *ad + struct wakeup_poll; + void stack_broadcast_clean_epoll(struct wakeup_poll *wakeup); + ++void stack_send_pkts(struct protocol_stack *stack); ++ + struct rpc_msg; + void stack_clean_epoll(struct rpc_msg *msg); + void stack_arp(struct rpc_msg *msg); +diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c +index d870dd3..5729ebf 100644 +--- a/src/lstack/netif/lstack_ethdev.c ++++ b/src/lstack/netif/lstack_ethdev.c +@@ -147,6 +147,25 @@ int32_t gazelle_eth_dev_poll(struct protocol_stack *stack, bool use_ltran_flag, + return nr_pkts; + } + ++static void add_send_pkt(struct protocol_stack *stack, struct rte_mbuf *mbuf) ++{ ++ if (stack->send_end + 1 != stack->send_start) { ++ stack->send_pkts[stack->send_end & STACK_SEND_MASK] = mbuf; ++ stack->send_end++; ++ return; ++ } ++ ++ stack->stats.send_pkts_fail++; ++ do { ++ stack_send_pkts(stack); ++ if (stack->send_end + 1 != stack->send_start) { ++ stack->send_pkts[stack->send_end & STACK_SEND_MASK] = mbuf; ++ stack->send_end++; ++ return; ++ } ++ } while (1); ++} ++ + static err_t eth_dev_output(struct netif *netif, struct pbuf *pbuf) + { + struct protocol_stack *stack = get_protocol_stack(); +@@ -197,17 +216,7 @@ static err_t eth_dev_output(struct netif *netif, struct pbuf *pbuf) + pbuf = pbuf->next; + } + +- if (stack->send_cnt + 1 < STACK_SEND_MAX) { +- stack->send_pkts[stack->send_cnt++] = first_mbuf; +- } else { +- uint32_t sent_pkts = stack->dev_ops.tx_xmit(stack, &first_mbuf, 1); +- stack->stats.tx += sent_pkts; +- if (sent_pkts < 1) { +- stack->stats.tx_drop++; +- rte_pktmbuf_free(first_mbuf); +- return ERR_MEM; +- } +- } ++ add_send_pkt(stack, first_mbuf); + + return ERR_OK; + } +diff --git a/src/ltran/ltran_dfx.c b/src/ltran/ltran_dfx.c +index 3801f19..651f279 100644 +--- a/src/ltran/ltran_dfx.c ++++ b/src/ltran/ltran_dfx.c +@@ -579,7 +579,7 @@ static void show_lstack_stats(struct gazelle_stack_dfx_data *lstack_stat) + printf("call_msg: %-19"PRIu64" ", lstack_stat->data.pkts.call_msg_cnt); + printf("call_alloc_fail: %-12"PRIu64" ", lstack_stat->data.pkts.call_alloc_fail); + printf("call_null: %-18"PRIu64" \n", lstack_stat->data.pkts.stack_stat.call_null); +- printf("send_self_rpc: %-14"PRIu64" \n", lstack_stat->data.pkts.stack_stat.send_self_rpc); ++ printf("send_pkts_fail: %-13"PRIu64" \n", lstack_stat->data.pkts.stack_stat.send_pkts_fail); + } + + static void gazelle_print_lstack_stat_detail(struct gazelle_stack_dfx_data *lstack_stat, +-- +2.23.0 + diff --git a/0167-expand-data-recv-buff.patch b/0167-expand-data-recv-buff.patch new file mode 100644 index 0000000..6dd7029 --- /dev/null +++ b/0167-expand-data-recv-buff.patch @@ -0,0 +1,357 @@ +From da54963163baf9213c8cd34da6ec3c533ab1ef9d Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Tue, 20 Dec 2022 15:42:33 +0800 +Subject: [PATCH 2/2] expand-data-recv-buff + +--- + src/common/dpdk_common.h | 2 +- + src/lstack/core/lstack_lwip.c | 202 ++++++++++++++++++++++++------- + src/lstack/include/lstack_dpdk.h | 2 + + 3 files changed, 164 insertions(+), 42 deletions(-) + +diff --git a/src/common/dpdk_common.h b/src/common/dpdk_common.h +index 63d651d..c93f506 100644 +--- a/src/common/dpdk_common.h ++++ b/src/common/dpdk_common.h +@@ -193,7 +193,7 @@ static __rte_always_inline uint32_t gazelle_ring_read(struct rte_ring *r, void * + + __rte_ring_dequeue_elems(r, prod, obj_table, sizeof(void *), n); + +- r->prod.head = prod + n; ++ __atomic_store_n(&r->prod.head, prod + n, __ATOMIC_RELEASE); + + return n; + } +diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c +index 0b9b684..32d21b6 100644 +--- a/src/lstack/core/lstack_lwip.c ++++ b/src/lstack/core/lstack_lwip.c +@@ -83,6 +83,11 @@ static void reset_sock_data(struct lwip_sock *sock) + sock->send_lastdata = NULL; + } + ++ if (sock->lwip_lastdata) { ++ free_list_pbuf(sock->lwip_lastdata); ++ sock->lwip_lastdata = NULL; ++ } ++ + if (sock->send_pre_del) { + pbuf_free(sock->send_pre_del); + sock->send_pre_del = NULL; +@@ -95,6 +100,7 @@ static void reset_sock_data(struct lwip_sock *sock) + sock->events = 0; + sock->in_send = 0; + sock->remain_len = 0; ++ sock->read_wait = false; + + if (sock->recv_lastdata) { + pbuf_free(sock->recv_lastdata); +@@ -185,7 +191,6 @@ void gazelle_init_sock(int32_t fd) + init_list_node_null(&sock->recv_list); + init_list_node_null(&sock->event_list); + init_list_node_null(&sock->send_list); +- pthread_spin_init(&sock->sock_lock, PTHREAD_PROCESS_PRIVATE); + } + + void gazelle_clean_sock(int32_t fd) +@@ -207,7 +212,6 @@ void gazelle_clean_sock(int32_t fd) + + list_del_node_null(&sock->recv_list); + list_del_node_null(&sock->send_list); +- pthread_spin_destroy(&sock->sock_lock); + } + + void gazelle_free_pbuf(struct pbuf *pbuf) +@@ -636,64 +640,166 @@ static inline void free_recv_ring_readover(struct rte_ring *ring) + } + } + +-ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags) ++static inline struct pbuf *gazelle_ring_enqueuelast(struct rte_ring *r) + { +- if (sock->conn->recvmbox == NULL) { +- return 0; ++ struct pbuf *last_pbuf = NULL; ++ volatile uint32_t head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); ++ uint32_t last = r->cons.head - 1; ++ if (last == head || last - head > r->capacity) { ++ return NULL; + } + +- free_recv_ring_readover(sock->recv_ring); ++ __rte_ring_dequeue_elems(r, last, (void **)&last_pbuf, sizeof(void *), 1); ++ __atomic_store_n(&last_pbuf->in_write, 1, __ATOMIC_RELEASE); + +- uint32_t free_count = gazelle_ring_free_count(sock->recv_ring); +- if (free_count == 0) { +- GAZELLE_RETURN(EAGAIN); ++ rte_mb(); ++ ++ head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); ++ if (last == head || last - head > r->capacity) { ++ __atomic_store_n(&last_pbuf->in_write, 0, __ATOMIC_RELEASE); ++ return NULL; + } + +- uint32_t data_count = rte_ring_count(sock->conn->recvmbox->ring); +- uint32_t read_num = LWIP_MIN(free_count, data_count); +- read_num = LWIP_MIN(read_num, SOCK_RECV_RING_SIZE); +- struct pbuf *pbufs[SOCK_RECV_RING_SIZE]; +- uint32_t read_count = 0; +- ssize_t recv_len = 0; ++ return last_pbuf; ++} ++ ++static inline struct pbuf *pbuf_last(struct pbuf *pbuf) ++{ ++ while (pbuf->next) { ++ pbuf = pbuf->next; ++ } ++ return pbuf; ++} ++ ++static struct pbuf *merge_pbufs(struct pbuf *pbufs[], uint32_t data_count, uint32_t data_len) ++{ ++ struct pbuf *pre_last = (pbufs[0]->last) ? pbufs[0]->last : pbuf_last(pbufs[0]); ++ ++ if (data_count <= 1) { ++ pbufs[0]->last = pre_last; ++ return pbufs[0]; ++ } ++ ++ for (uint32_t i = 1; i < data_count; i++) { ++ pre_last->next = pbufs[i]; ++ pre_last = pbuf_last(pbufs[i]); ++ } ++ ++ pbufs[0]->tot_len = data_len; ++ pbufs[0]->last = pre_last; ++ ++ return pbufs[0]; ++} + +- for (uint32_t i = 0; i < read_num; i++) { ++static int32_t get_lwip_pbufs(struct lwip_sock *sock, struct pbuf *pbufs[], uint32_t *data_count, u8_t apiflags) ++{ ++ uint32_t data_len = 0; ++ ++ for (uint32_t i = 0; i < *data_count; i++) { + err_t err = netconn_recv_tcp_pbuf_flags(sock->conn, &pbufs[i], apiflags); + if (err != ERR_OK) { +- if (recv_len > 0) { ++ *data_count = i; ++ if (data_len > 0) { + /* already received data, return that (this trusts in getting the same error from + netconn layer again next time netconn_recv is called) */ + break; + } +- +- return (err == ERR_CLSD) ? 0 : -1; ++ return (err == ERR_CLSD) ? -1 : 0; + } + +- recv_len += pbufs[i]->tot_len; +- read_count++; ++ pbufs[i]->last = NULL; ++ pbufs[i]->in_write = 0; ++ data_len += pbufs[i]->tot_len; + + /* once we have some data to return, only add more if we don't need to wait */ + apiflags |= NETCONN_DONTBLOCK | NETCONN_NOFIN; + } + +- if (!(flags & MSG_PEEK)) { +- uint32_t enqueue_num = gazelle_ring_sp_enqueue(sock->recv_ring, (void **)pbufs, read_count); +- for (uint32_t i = enqueue_num; i < read_count; i++) { +- /* update receive window */ +- tcp_recved(sock->conn->pcb.tcp, pbufs[i]->tot_len); +- pbuf_free(pbufs[i]); +- sock->stack->stats.read_lwip_drop++; +- } ++ return (int32_t)data_len; ++} ++ ++static void put_pbufs_into_recv_ring(struct lwip_sock *sock, struct pbuf *pbufs[], ++ uint32_t data_count, uint32_t data_len) ++{ ++ uint32_t free_count = gazelle_ring_free_count(sock->recv_ring); ++ ++ if (data_count <= free_count) { ++ (void)gazelle_ring_sp_enqueue(sock->recv_ring, (void **)pbufs, data_count); ++ return; + } + +- for (uint32_t i = 0; get_protocol_stack_group()->latency_start && i < read_count; i++) { +- calculate_lstack_latency(&sock->stack->latency, pbufs[i], GAZELLE_LATENCY_LWIP); ++ struct pbuf *new_pbuf = merge_pbufs(pbufs, data_count, data_len); ++ ++ if (free_count) { ++ (void)gazelle_ring_sp_enqueue(sock->recv_ring, (void **)&new_pbuf, 1); ++ return; ++ } ++ ++ struct pbuf *last_pbuf = gazelle_ring_enqueuelast(sock->recv_ring); ++ if (last_pbuf == NULL) { ++ sock->lwip_lastdata = new_pbuf; ++ return; + } + +- sock->stack->stats.read_lwip_cnt += read_count; +- if (recv_len == 0) { ++ if (last_pbuf->last == NULL) { ++ last_pbuf->last = pbuf_last(last_pbuf); ++ } ++ last_pbuf->last->next = new_pbuf; ++ last_pbuf->tot_len += new_pbuf->tot_len; ++ last_pbuf->last = new_pbuf->last; ++ gazelle_ring_lastover(last_pbuf); ++ ++ if (last_pbuf->tot_len > SOCK_READ_MAXLEN) { ++ sock->read_wait = true; ++ } ++} ++ ++ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags) ++{ ++ if (sock->conn->recvmbox == NULL) { ++ return 0; ++ } ++ ++ free_recv_ring_readover(sock->recv_ring); ++ ++ if (sock->read_wait) { + GAZELLE_RETURN(EAGAIN); + } +- return recv_len; ++ ++ struct pbuf *pbufs[SOCK_RECV_RING_SIZE]; ++ uint32_t data_count = rte_ring_count(sock->conn->recvmbox->ring); ++ int32_t data_len = get_lwip_pbufs(sock, pbufs, &data_count, apiflags); ++ if (unlikely(data_len < 0)) { ++ /* ERR_CLSD */ ++ return 0; ++ } else if (unlikely(data_len == 0) && !sock->lwip_lastdata) { ++ GAZELLE_RETURN(EAGAIN); ++ } ++ ++ if (get_protocol_stack_group()->latency_start) { ++ for (uint32_t i = 0; i < data_count; i++) { ++ calculate_lstack_latency(&sock->stack->latency, pbufs[i], GAZELLE_LATENCY_LWIP); ++ } ++ } ++ ++ if (data_count) { ++ uint32_t last_len = 0; ++ if (sock->lwip_lastdata) { ++ last_len = sock->lwip_lastdata->tot_len; ++ sock->lwip_lastdata->last->next = pbufs[0]; ++ sock->lwip_lastdata->tot_len += pbufs[0]->tot_len; ++ sock->lwip_lastdata->last = pbuf_last(pbufs[0]); ++ pbufs[0] = sock->lwip_lastdata; ++ sock->lwip_lastdata = NULL; ++ } ++ put_pbufs_into_recv_ring(sock, pbufs, data_count, data_len + last_len); ++ } else { ++ put_pbufs_into_recv_ring(sock, &sock->lwip_lastdata, 1, sock->lwip_lastdata->tot_len); ++ sock->lwip_lastdata = NULL; ++ } ++ sock->stack->stats.read_lwip_cnt += data_count; ++ ++ return data_len; + } + + static int32_t check_msg_vaild(const struct msghdr *message) +@@ -725,9 +831,9 @@ ssize_t recvmsg_from_stack(int32_t s, struct msghdr *message, int32_t flags) + } + + for (int32_t i = 0; i < message->msg_iovlen; i++) { +- if (message->msg_iov[i].iov_len == 0){ +- continue; +- } ++ if (message->msg_iov[i].iov_len == 0) { ++ continue; ++ } + + ssize_t recvd_local = read_stack_data(s, message->msg_iov[i].iov_base, message->msg_iov[i].iov_len, flags); + if (recvd_local > 0) { +@@ -828,7 +934,7 @@ static inline void del_data_in_event(struct lwip_sock *sock) + + static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len) + { +- uint16_t tot_len = pbuf->tot_len - free_len; ++ uint32_t tot_len = pbuf->tot_len - free_len; + + while (free_len && pbuf) { + if (free_len >= pbuf->len) { +@@ -840,7 +946,9 @@ static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len) + } + } + +- pbuf->tot_len = tot_len; ++ if (pbuf) { ++ pbuf->tot_len = tot_len; ++ } + return pbuf; + } + +@@ -849,7 +957,7 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags) + size_t recv_left = len; + struct pbuf *pbuf = NULL; + ssize_t recvd = 0; +- uint16_t copy_len; ++ uint32_t copy_len; + struct lwip_sock *sock = get_socket_by_fd(fd); + bool latency_enable = get_protocol_stack_group()->latency_start; + +@@ -870,8 +978,15 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags) + break; + } + } ++ if (__atomic_load_n(&pbuf->in_write, __ATOMIC_ACQUIRE)) { ++ sock->recv_lastdata = pbuf; ++ break; ++ } + +- copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : (uint16_t)recv_left; ++ copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : recv_left; ++ if (copy_len > UINT16_MAX) { ++ copy_len = UINT16_MAX; ++ } + pbuf_copy_partial(pbuf, (char *)buf + recvd, copy_len, 0); + + recvd += copy_len; +@@ -879,6 +994,7 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags) + + if (pbuf->tot_len > copy_len) { + sock->recv_lastdata = pbuf_free_partial(pbuf, copy_len); ++ break; + } else { + if (sock->wakeup) { + sock->wakeup->stat.app_read_cnt += 1; +@@ -890,6 +1006,10 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags) + } + } + ++ if (sock->read_wait) { ++ sock->read_wait = false; ++ } ++ + /* rte_ring_count reduce lock */ + if (sock->wakeup && sock->wakeup->type == WAKEUP_EPOLL && (sock->events & EPOLLIN)) { + del_data_in_event(sock); +diff --git a/src/lstack/include/lstack_dpdk.h b/src/lstack/include/lstack_dpdk.h +index c3bc527..ac068b8 100644 +--- a/src/lstack/include/lstack_dpdk.h ++++ b/src/lstack/include/lstack_dpdk.h +@@ -24,6 +24,8 @@ + + #define MAX_PACKET_SZ 2048 + ++#define SOCK_READ_MAXLEN 0x200000 ++ + #define RING_SIZE(x) ((x) - 1) + + #define MBUF_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM) +-- +2.23.0 + diff --git a/gazelle.spec b/gazelle.spec index ff13ff8..b75dddf 100644 --- a/gazelle.spec +++ b/gazelle.spec @@ -2,7 +2,7 @@ Name: gazelle Version: 1.0.1 -Release: 37 +Release: 38 Summary: gazelle is a high performance user-mode stack License: MulanPSL-2.0 URL: https://gitee.com/openeuler/gazelle @@ -180,6 +180,8 @@ Patch9162: 0162-remove-mbuf-reserve-in-mbuf-alloc.patch Patch9163: 0163-pkts-bulk-send-to-nic.patch Patch9164: 0164-rpc-dont-send.patch Patch9165: 0165-recv-pbuf-free-timely.patch +Patch9166: 0166-optimite-send-pkts-dul-index.patch +Patch9167: 0167-expand-data-recv-buff.patch %description %{name} is a high performance user-mode stack. @@ -220,6 +222,9 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b %config(noreplace) %{conf_path}/ltran.conf %changelog +* Tue Dec 20 2022 wuchangsheng - 1.0.1-38 +- optimite recv data buff and send pkts index + * Sun Dec 18 2022 wuchangsheng - 1.0.1-37 - pkts-bulk-send-to-nic and rpc-dont-send