reduce epoll wakeup

(cherry picked from commit 50942cd445db019fbe1cbc23f5d4e100bf0beea2)
This commit is contained in:
wu-changsheng 2022-12-28 18:43:35 +08:00 committed by openeuler-sync-bot
parent b7ace2d34c
commit 98bd02dbe0
4 changed files with 452 additions and 1 deletions

View File

@ -0,0 +1,141 @@
From 2f907cc7c537ed857c23fb183bb5d5751c1e4d3a Mon Sep 17 00:00:00 2001
From: jiangheng <jiangheng14@huawei.com>
Date: Wed, 28 Dec 2022 16:50:48 +0800
Subject: [PATCH 1/3] pbuf-align-cache-line
---
src/common/dpdk_common.h | 31 +++++++++++++++++------------
src/common/gazelle_opt.h | 2 --
src/lstack/core/lstack_dpdk.c | 3 +--
src/lstack/core/lstack_stack_stat.c | 2 +-
src/ltran/ltran_ethdev.c | 6 ++++--
5 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/src/common/dpdk_common.h b/src/common/dpdk_common.h
index 08ce4a1..49e03a7 100644
--- a/src/common/dpdk_common.h
+++ b/src/common/dpdk_common.h
@@ -21,22 +21,27 @@
#define GAZELLE_KNI_NAME "kni" // will be removed during dpdk update
-/* time_stamp time_stamp_vaid_check . align 8 */
-#define PTR_TO_PRIVATE(mbuf) RTE_PTR_ADD(mbuf, sizeof(struct rte_mbuf))
-
/* Layout:
- * | rte_mbuf | gazelle_prive | pbuf_custom | tcp_seg | payload |
- * | 128 | 16 | 64 | 32 |
+ * | rte_mbuf | pbuf_custom| tcp_seg | gazelle_prive | payload |
+ * | 128 | 64 | 32 | 16 |
* rte_prefetch0 in lwip project,tcp_out.c,tcp_output_segment use constants
+ * cacheline is 64, make sure pbuf_custom in same cacheline
**/
struct pbuf;
-static inline struct rte_mbuf *pbuf_to_mbuf(struct pbuf *p)
+#define LATENCY_TIMESTAMP_SIZE (sizeof(uint64_t) * 2)
+#define MBUF_PRIVATE_SIZE 128
+#define LATENCY_OFFSET 96
+static __rte_always_inline uint64_t *mbuf_to_private(struct rte_mbuf *mbuf)
+{
+ return (uint64_t *)((uint8_t *)(mbuf) - sizeof(struct rte_mbuf) - LATENCY_OFFSET);
+}
+static __rte_always_inline struct rte_mbuf *pbuf_to_mbuf(struct pbuf *p)
{
- return ((struct rte_mbuf *)(void *)((uint8_t *)(p) - sizeof(struct rte_mbuf) - GAZELLE_MBUFF_PRIV_SIZE));
+ return ((struct rte_mbuf *)(void *)((uint8_t *)(p) - sizeof(struct rte_mbuf)));
}
-static inline struct pbuf_custom *mbuf_to_pbuf(struct rte_mbuf *m)
+static __rte_always_inline struct pbuf_custom *mbuf_to_pbuf(struct rte_mbuf *m)
{
- return ((struct pbuf_custom *)((uint8_t *)(m) + sizeof(struct rte_mbuf) + GAZELLE_MBUFF_PRIV_SIZE));
+ return ((struct pbuf_custom *)((uint8_t *)(m) + sizeof(struct rte_mbuf)));
}
/* NOTE!!! magic code, even the order.
@@ -62,15 +67,15 @@ static __rte_always_inline void copy_mbuf(struct rte_mbuf *dst, struct rte_mbuf
rte_memcpy(dst_data, src_data, data_len);
// copy private date.
- dst_data = (uint8_t *)PTR_TO_PRIVATE(dst);
- src_data = (uint8_t *)PTR_TO_PRIVATE(src);
- rte_memcpy(dst_data, src_data, GAZELLE_MBUFF_PRIV_SIZE);
+ dst_data = (uint8_t *)mbuf_to_private(dst);
+ src_data = (uint8_t *)mbuf_to_private(src);
+ rte_memcpy(dst_data, src_data, LATENCY_TIMESTAMP_SIZE);
}
static __rte_always_inline void time_stamp_into_mbuf(uint32_t rx_count, struct rte_mbuf *buf[], uint64_t time_stamp)
{
for (uint32_t i = 0; i < rx_count; i++) {
- uint64_t *priv = (uint64_t *)PTR_TO_PRIVATE(buf[i]);
+ uint64_t *priv = mbuf_to_private(buf[i]);
*priv = time_stamp; // time stamp
*(priv + 1) = ~(*priv); // just for later vaid check
}
diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h
index 94f274d..012997c 100644
--- a/src/common/gazelle_opt.h
+++ b/src/common/gazelle_opt.h
@@ -28,8 +28,6 @@
#define ETHER_ADDR_LEN 6
-#define GAZELLE_MBUFF_PRIV_SIZE (sizeof(uint64_t) * 2)
-
#define DEFAULT_RING_SIZE (512)
#define DEFAULT_RING_MASK (511)
#define DEFAULT_BACKUP_RING_SIZE_FACTOR (16)
diff --git a/src/lstack/core/lstack_dpdk.c b/src/lstack/core/lstack_dpdk.c
index de87d48..560162f 100644
--- a/src/lstack/core/lstack_dpdk.c
+++ b/src/lstack/core/lstack_dpdk.c
@@ -144,8 +144,7 @@ static struct rte_mempool *create_pktmbuf_mempool(const char *name, uint32_t nb_
}
/* time stamp before pbuf_custom as priv_data */
- uint16_t private_size = sizeof(struct tcp_seg) + sizeof(struct pbuf_custom) + GAZELLE_MBUFF_PRIV_SIZE;
- private_size = RTE_ALIGN(private_size, RTE_CACHE_LINE_SIZE);
+ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE);
pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf, mbuf_cache_size, private_size, MBUF_SZ, rte_socket_id());
if (pool == NULL) {
LSTACK_LOG(ERR, LSTACK, "cannot create %s pool rte_err=%d\n", pool_name, rte_errno);
diff --git a/src/lstack/core/lstack_stack_stat.c b/src/lstack/core/lstack_stack_stat.c
index 59c8e66..7243e82 100644
--- a/src/lstack/core/lstack_stack_stat.c
+++ b/src/lstack/core/lstack_stack_stat.c
@@ -50,7 +50,7 @@ uint64_t get_current_time(void)
void calculate_lstack_latency(struct gazelle_stack_latency *stack_latency, const struct pbuf *pbuf,
enum GAZELLE_LATENCY_TYPE type)
{
- const uint64_t *priv = (uint64_t *)((uint8_t *)(pbuf) - GAZELLE_MBUFF_PRIV_SIZE);
+ const uint64_t *priv = (uint64_t *)((uint8_t *)(pbuf) - LATENCY_OFFSET);
if (*priv != ~(*(priv + 1)) || *priv < stack_latency->start_time) {
return;
}
diff --git a/src/ltran/ltran_ethdev.c b/src/ltran/ltran_ethdev.c
index 62a662d..e0c824a 100644
--- a/src/ltran/ltran_ethdev.c
+++ b/src/ltran/ltran_ethdev.c
@@ -147,7 +147,8 @@ static struct rte_mempool *ltran_create_rx_mbuf_pool(uint32_t bond_port_index)
return NULL;
}
- return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, GAZELLE_MBUFF_PRIV_SIZE,
+ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE);
+ return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, private_size,
RTE_MBUF_DEFAULT_BUF_SIZE, (int32_t)rte_socket_id());
}
@@ -165,7 +166,8 @@ static struct rte_mempool *ltran_create_tx_mbuf_pool(uint32_t bond_port_index)
return NULL;
}
- return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, GAZELLE_MBUFF_PRIV_SIZE,
+ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE);
+ return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, private_size,
RTE_MBUF_DEFAULT_BUF_SIZE, (int32_t)rte_socket_id());
}
--
2.23.0

View File

@ -0,0 +1,155 @@
From c6d8e28b70d6ac86c1f8df6b94c179be4f8109f0 Mon Sep 17 00:00:00 2001
From: jiangheng <jiangheng14@huawei.com>
Date: Wed, 28 Dec 2022 17:05:57 +0800
Subject: [PATCH 2/3] support-set-main-thread-affinity
---
src/lstack/core/lstack_cfg.c | 18 ++++++++++++++++++
src/lstack/core/lstack_init.c | 18 +++++++++++-------
src/lstack/core/lstack_protocol_stack.c | 4 +---
src/lstack/include/lstack_cfg.h | 1 +
src/lstack/lstack.conf | 5 +++++
5 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/src/lstack/core/lstack_cfg.c b/src/lstack/core/lstack_cfg.c
index 55d16f9..4912fdd 100644
--- a/src/lstack/core/lstack_cfg.c
+++ b/src/lstack/core/lstack_cfg.c
@@ -56,6 +56,7 @@ static int32_t parse_gateway_addr(void);
static int32_t parse_kni_switch(void);
static int32_t parse_listen_shadow(void);
static int32_t parse_app_bind_numa(void);
+static int32_t parse_main_thread_affinity(void);
static int32_t parse_unix_prefix(void);
static int32_t parse_rxtx_pool_size(void);
static int32_t parse_send_connect_number(void);
@@ -81,6 +82,7 @@ static struct config_vector_t g_config_tbl[] = {
{ "kni_switch", parse_kni_switch },
{ "listen_shadow", parse_listen_shadow },
{ "app_bind_numa", parse_app_bind_numa },
+ { "main_thread_affinity", parse_main_thread_affinity },
{ "unix_prefix", parse_unix_prefix },
{ "mbuf_pool_size", parse_rxtx_pool_size },
{ "send_connect_number", parse_send_connect_number },
@@ -847,6 +849,22 @@ static int32_t parse_app_bind_numa(void)
return 0;
}
+static int32_t parse_main_thread_affinity(void)
+{
+ const config_setting_t *arg = NULL;
+
+ arg = config_lookup(&g_config, "main_thread_affinity");
+ if (arg == NULL) {
+ g_config_params.main_thread_affinity = false;
+ return 0;
+ }
+
+ int32_t val = config_setting_get_int(arg);
+ g_config_params.main_thread_affinity = (val == 0) ? false : true;
+
+ return 0;
+}
+
static int32_t parse_kni_switch(void)
{
const config_setting_t *arg = NULL;
diff --git a/src/lstack/core/lstack_init.c b/src/lstack/core/lstack_init.c
index f647b8e..6347ab1 100644
--- a/src/lstack/core/lstack_init.c
+++ b/src/lstack/core/lstack_init.c
@@ -223,18 +223,18 @@ static void create_control_thread(void)
pthread_t tid;
if (use_ltran()) {
+ ret = pthread_create(&tid, NULL, (void *(*)(void *))control_client_thread, NULL);
dpdk_skip_nic_init();
if (control_init_client(false) != 0) {
LSTACK_EXIT(1, "control_init_client failed\n");
}
- ret = pthread_create(&tid, NULL, (void *(*)(void *))control_client_thread, NULL);
} else {
+ ret = pthread_create(&tid, NULL, (void *(*)(void *))control_server_thread, NULL);
ret = dpdk_eal_init();
if (ret < 0) {
LSTACK_EXIT(1, "dpdk_eal_init failed ret=%d errno=%d\n", ret, errno);
}
- ret = pthread_create(&tid, NULL, (void *(*)(void *))control_server_thread, NULL);
}
if (ret != 0) {
LSTACK_EXIT(1, "pthread_create failed ret=%d errno=%d\n", ret, errno);
@@ -295,9 +295,11 @@ __attribute__((constructor)) void gazelle_network_init(void)
/*
* save initial affinity */
- if (thread_affinity_default() < 0) {
- LSTACK_PRE_LOG(LSTACK_ERR, "pthread_getaffinity_np failed\n");
- LSTACK_EXIT(1, "pthread_getaffinity_np failed\n");
+ if (!get_global_cfg_params()->main_thread_affinity) {
+ if (thread_affinity_default() < 0) {
+ LSTACK_PRE_LOG(LSTACK_ERR, "pthread_getaffinity_np failed\n");
+ LSTACK_EXIT(1, "pthread_getaffinity_np failed\n");
+ }
}
gazelle_signal_init();
@@ -309,8 +311,10 @@ __attribute__((constructor)) void gazelle_network_init(void)
/*
* cancel the core binding from DPDK initialization */
- if (thread_affinity_default() < 0) {
- LSTACK_EXIT(1, "pthread_setaffinity_np failed\n");
+ if (!get_global_cfg_params()->main_thread_affinity) {
+ if (thread_affinity_default() < 0) {
+ LSTACK_EXIT(1, "pthread_setaffinity_np failed\n");
+ }
}
lstack_log_level_init();
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index 9cc8946..ad9d026 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -255,9 +255,7 @@ static void* gazelle_kernelevent_thread(void *arg)
uint16_t queue_id = *(uint16_t *)arg;
struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id];
- if (get_global_cfg_params()->app_bind_numa) {
- bind_to_stack_numa(stack);
- }
+ bind_to_stack_numa(stack);
LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id);
diff --git a/src/lstack/include/lstack_cfg.h b/src/lstack/include/lstack_cfg.h
index e33a484..bdaa083 100644
--- a/src/lstack/include/lstack_cfg.h
+++ b/src/lstack/include/lstack_cfg.h
@@ -83,6 +83,7 @@ struct cfg_params {
bool kni_switch;
bool listen_shadow; // true:listen in all stack thread. false:listen in one stack thread.
bool app_bind_numa;
+ bool main_thread_affinity;
int dpdk_argc;
char **dpdk_argv;
struct secondary_attach_arg sec_attach_arg;
diff --git a/src/lstack/lstack.conf b/src/lstack/lstack.conf
index 47140ad..c27db22 100644
--- a/src/lstack/lstack.conf
+++ b/src/lstack/lstack.conf
@@ -34,6 +34,11 @@ num_cpus="2"
#each cpu core start a wakeup thread.
#num_wakeup="3"
+#app worker thread bind to numa in epoll/poll.
+app_bind_numa=1
+#app main thread affinity set by dpdk.
+main_thread_affinity=0
+
host_addr="192.168.1.10"
mask_addr="255.255.255.0"
gateway_addr="192.168.1.1"
--
2.23.0

View File

@ -0,0 +1,147 @@
From 99f46a3e20d44ec8736becee896ed519971aab52 Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Wed, 28 Dec 2022 18:26:14 +0800
Subject: [PATCH 3/3] reduce epoll wakeup
---
src/lstack/api/lstack_epoll.c | 28 +++++++++++--------------
src/lstack/core/lstack_lwip.c | 1 +
src/lstack/core/lstack_protocol_stack.c | 8 +++----
3 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c
index 605984f..7860163 100644
--- a/src/lstack/api/lstack_epoll.c
+++ b/src/lstack/api/lstack_epoll.c
@@ -186,7 +186,7 @@ int32_t lstack_do_epoll_create(int32_t fd)
GAZELLE_RETURN(EINVAL);
}
pthread_mutex_trylock(&wakeup->wait);
- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
+ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE);
struct protocol_stack_group *stack_group = get_protocol_stack_group();
init_list_node_null(&wakeup->poll_list);
@@ -473,6 +473,7 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe
}
do {
+ __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
lwip_num = epoll_lwip_event(wakeup, events, maxevents);
wakeup->stat.app_events += lwip_num;
@@ -484,11 +485,11 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe
}
if (lwip_num + kernel_num > 0) {
- return lwip_num + kernel_num;
+ break;
}
if (timeout == 0) {
- return 0;
+ break;
}
if (timeout < 0) {
@@ -498,13 +499,10 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe
ms_to_timespec(&epoll_time, timeout);
ret = pthread_mutex_timedlock(&wakeup->wait, &epoll_time);
}
-
- if (ret == 0) {
- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
- }
} while (ret == 0);
- return 0;
+ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE);
+ return lwip_num + kernel_num;
}
static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup)
@@ -513,7 +511,7 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup)
GAZELLE_RETURN(EINVAL);
}
pthread_mutex_trylock(&wakeup->wait);
- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
+ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE);
for (uint32_t i = 0; i < PROTOCOL_STACK_MAX; i++) {
init_list_node_null(&wakeup->wakeup_list[i]);
@@ -680,6 +678,7 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
int32_t ret;
do {
+ __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
lwip_num = poll_lwip_event(fds, nfds);
if (__atomic_load_n(&wakeup->have_kernel_event, __ATOMIC_ACQUIRE)) {
@@ -694,11 +693,11 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
}
if (lwip_num + kernel_num > 0) {
- return lwip_num + kernel_num;
+ break;
}
if (timeout == 0) {
- return 0;
+ break;
}
if (timeout < 0) {
@@ -708,11 +707,8 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
ms_to_timespec(&epoll_time, timeout);
ret = pthread_mutex_timedlock(&wakeup->wait, &epoll_time);
}
-
- if (ret == 0) {
- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE);
- }
} while (ret == 0);
- return 0;
+ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE);
+ return lwip_num + kernel_num;
}
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
index 31f87cf..01c7280 100644
--- a/src/lstack/core/lstack_lwip.c
+++ b/src/lstack/core/lstack_lwip.c
@@ -598,6 +598,7 @@ void stack_send(struct rpc_msg *msg)
}
__atomic_store_n(&sock->in_send, 0, __ATOMIC_RELEASE);
+ rte_mb();
/* have remain data or replenish again add sendlist */
if (sock->errevent == 0 && NETCONN_IS_DATAOUT(sock)) {
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index ad9d026..93204d1 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -481,16 +481,16 @@ static void* gazelle_stack_thread(void *arg)
for (;;) {
poll_rpc_msg(stack, rpc_number);
+ send_stack_list(stack, send_connect_number);
+
+ stack_send_pkts(stack);
+
stack_free_recv_pkts(stack, nic_read_number);
gazelle_eth_dev_poll(stack, use_ltran_flag, nic_read_number);
read_recv_list(stack, read_connect_number);
- send_stack_list(stack, send_connect_number);
-
- stack_send_pkts(stack);
-
if ((wakeup_tick & 0xf) == 0) {
wakeup_kernel_event(stack);
wakeup_stack_epoll(stack, wakeup_thread_enable);
--
2.23.0

View File

@ -2,7 +2,7 @@
Name: gazelle
Version: 1.0.1
Release: 41
Release: 42
Summary: gazelle is a high performance user-mode stack
License: MulanPSL-2.0
URL: https://gitee.com/openeuler/gazelle
@ -190,6 +190,9 @@ Patch9172: 0172-fix-send-pkts-bluk-err.patch
Patch9173: 0173-free-recv-pkts-bluks.patch
Patch9174: 0174-fix-lstack-Makefile-warning.patch
Patch9175: 0175-fix-null-pointer-deref-in-stack_broadcast_close.patch
Patch9176: 0176-pbuf-align-cache-line.patch
Patch9177: 0177-support-set-main-thread-affinity.patch
Patch9178: 0178-reduce-epoll-wakeup.patch
%description
%{name} is a high performance user-mode stack.
@ -230,6 +233,11 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b
%config(noreplace) %{conf_path}/ltran.conf
%changelog
* Wed Dec 28 2022 wuchangsheng <wuchangsheng2@huawei.com> - 1.0.1-42
- pbuf cacheline align
support main thread affinity
reduce epoll wakeup
* Fri Dec 23 2022 kircher <majun65@huawei.com> - 1.0.1-41
- fix null pointer deref in stack_broadcast_close
- fix lstack Makefile warning