From 98bd02dbe0252fd28c8f2c9a3b522e84416ce28d Mon Sep 17 00:00:00 2001 From: wu-changsheng Date: Wed, 28 Dec 2022 18:43:35 +0800 Subject: [PATCH] reduce epoll wakeup (cherry picked from commit 50942cd445db019fbe1cbc23f5d4e100bf0beea2) --- 0176-pbuf-align-cache-line.patch | 141 ++++++++++++++++++ 0177-support-set-main-thread-affinity.patch | 155 ++++++++++++++++++++ 0178-reduce-epoll-wakeup.patch | 147 +++++++++++++++++++ gazelle.spec | 10 +- 4 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 0176-pbuf-align-cache-line.patch create mode 100644 0177-support-set-main-thread-affinity.patch create mode 100644 0178-reduce-epoll-wakeup.patch diff --git a/0176-pbuf-align-cache-line.patch b/0176-pbuf-align-cache-line.patch new file mode 100644 index 0000000..fc55d36 --- /dev/null +++ b/0176-pbuf-align-cache-line.patch @@ -0,0 +1,141 @@ +From 2f907cc7c537ed857c23fb183bb5d5751c1e4d3a Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Wed, 28 Dec 2022 16:50:48 +0800 +Subject: [PATCH 1/3] pbuf-align-cache-line + +--- + src/common/dpdk_common.h | 31 +++++++++++++++++------------ + src/common/gazelle_opt.h | 2 -- + src/lstack/core/lstack_dpdk.c | 3 +-- + src/lstack/core/lstack_stack_stat.c | 2 +- + src/ltran/ltran_ethdev.c | 6 ++++-- + 5 files changed, 24 insertions(+), 20 deletions(-) + +diff --git a/src/common/dpdk_common.h b/src/common/dpdk_common.h +index 08ce4a1..49e03a7 100644 +--- a/src/common/dpdk_common.h ++++ b/src/common/dpdk_common.h +@@ -21,22 +21,27 @@ + + #define GAZELLE_KNI_NAME "kni" // will be removed during dpdk update + +-/* time_stamp time_stamp_vaid_check . align 8 */ +-#define PTR_TO_PRIVATE(mbuf) RTE_PTR_ADD(mbuf, sizeof(struct rte_mbuf)) +- + /* Layout: +- * | rte_mbuf | gazelle_prive | pbuf_custom | tcp_seg | payload | +- * | 128 | 16 | 64 | 32 | ++ * | rte_mbuf | pbuf_custom| tcp_seg | gazelle_prive | payload | ++ * | 128 | 64 | 32 | 16 | + * rte_prefetch0 in lwip project,tcp_out.c,tcp_output_segment use constants ++ * cacheline is 64, make sure pbuf_custom in same cacheline + **/ + struct pbuf; +-static inline struct rte_mbuf *pbuf_to_mbuf(struct pbuf *p) ++#define LATENCY_TIMESTAMP_SIZE (sizeof(uint64_t) * 2) ++#define MBUF_PRIVATE_SIZE 128 ++#define LATENCY_OFFSET 96 ++static __rte_always_inline uint64_t *mbuf_to_private(struct rte_mbuf *mbuf) ++{ ++ return (uint64_t *)((uint8_t *)(mbuf) - sizeof(struct rte_mbuf) - LATENCY_OFFSET); ++} ++static __rte_always_inline struct rte_mbuf *pbuf_to_mbuf(struct pbuf *p) + { +- return ((struct rte_mbuf *)(void *)((uint8_t *)(p) - sizeof(struct rte_mbuf) - GAZELLE_MBUFF_PRIV_SIZE)); ++ return ((struct rte_mbuf *)(void *)((uint8_t *)(p) - sizeof(struct rte_mbuf))); + } +-static inline struct pbuf_custom *mbuf_to_pbuf(struct rte_mbuf *m) ++static __rte_always_inline struct pbuf_custom *mbuf_to_pbuf(struct rte_mbuf *m) + { +- return ((struct pbuf_custom *)((uint8_t *)(m) + sizeof(struct rte_mbuf) + GAZELLE_MBUFF_PRIV_SIZE)); ++ return ((struct pbuf_custom *)((uint8_t *)(m) + sizeof(struct rte_mbuf))); + } + + /* NOTE!!! magic code, even the order. +@@ -62,15 +67,15 @@ static __rte_always_inline void copy_mbuf(struct rte_mbuf *dst, struct rte_mbuf + rte_memcpy(dst_data, src_data, data_len); + + // copy private date. +- dst_data = (uint8_t *)PTR_TO_PRIVATE(dst); +- src_data = (uint8_t *)PTR_TO_PRIVATE(src); +- rte_memcpy(dst_data, src_data, GAZELLE_MBUFF_PRIV_SIZE); ++ dst_data = (uint8_t *)mbuf_to_private(dst); ++ src_data = (uint8_t *)mbuf_to_private(src); ++ rte_memcpy(dst_data, src_data, LATENCY_TIMESTAMP_SIZE); + } + + static __rte_always_inline void time_stamp_into_mbuf(uint32_t rx_count, struct rte_mbuf *buf[], uint64_t time_stamp) + { + for (uint32_t i = 0; i < rx_count; i++) { +- uint64_t *priv = (uint64_t *)PTR_TO_PRIVATE(buf[i]); ++ uint64_t *priv = mbuf_to_private(buf[i]); + *priv = time_stamp; // time stamp + *(priv + 1) = ~(*priv); // just for later vaid check + } +diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h +index 94f274d..012997c 100644 +--- a/src/common/gazelle_opt.h ++++ b/src/common/gazelle_opt.h +@@ -28,8 +28,6 @@ + + #define ETHER_ADDR_LEN 6 + +-#define GAZELLE_MBUFF_PRIV_SIZE (sizeof(uint64_t) * 2) +- + #define DEFAULT_RING_SIZE (512) + #define DEFAULT_RING_MASK (511) + #define DEFAULT_BACKUP_RING_SIZE_FACTOR (16) +diff --git a/src/lstack/core/lstack_dpdk.c b/src/lstack/core/lstack_dpdk.c +index de87d48..560162f 100644 +--- a/src/lstack/core/lstack_dpdk.c ++++ b/src/lstack/core/lstack_dpdk.c +@@ -144,8 +144,7 @@ static struct rte_mempool *create_pktmbuf_mempool(const char *name, uint32_t nb_ + } + + /* time stamp before pbuf_custom as priv_data */ +- uint16_t private_size = sizeof(struct tcp_seg) + sizeof(struct pbuf_custom) + GAZELLE_MBUFF_PRIV_SIZE; +- private_size = RTE_ALIGN(private_size, RTE_CACHE_LINE_SIZE); ++ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE); + pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf, mbuf_cache_size, private_size, MBUF_SZ, rte_socket_id()); + if (pool == NULL) { + LSTACK_LOG(ERR, LSTACK, "cannot create %s pool rte_err=%d\n", pool_name, rte_errno); +diff --git a/src/lstack/core/lstack_stack_stat.c b/src/lstack/core/lstack_stack_stat.c +index 59c8e66..7243e82 100644 +--- a/src/lstack/core/lstack_stack_stat.c ++++ b/src/lstack/core/lstack_stack_stat.c +@@ -50,7 +50,7 @@ uint64_t get_current_time(void) + void calculate_lstack_latency(struct gazelle_stack_latency *stack_latency, const struct pbuf *pbuf, + enum GAZELLE_LATENCY_TYPE type) + { +- const uint64_t *priv = (uint64_t *)((uint8_t *)(pbuf) - GAZELLE_MBUFF_PRIV_SIZE); ++ const uint64_t *priv = (uint64_t *)((uint8_t *)(pbuf) - LATENCY_OFFSET); + if (*priv != ~(*(priv + 1)) || *priv < stack_latency->start_time) { + return; + } +diff --git a/src/ltran/ltran_ethdev.c b/src/ltran/ltran_ethdev.c +index 62a662d..e0c824a 100644 +--- a/src/ltran/ltran_ethdev.c ++++ b/src/ltran/ltran_ethdev.c +@@ -147,7 +147,8 @@ static struct rte_mempool *ltran_create_rx_mbuf_pool(uint32_t bond_port_index) + return NULL; + } + +- return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, GAZELLE_MBUFF_PRIV_SIZE, ++ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE); ++ return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, private_size, + RTE_MBUF_DEFAULT_BUF_SIZE, (int32_t)rte_socket_id()); + } + +@@ -165,7 +166,8 @@ static struct rte_mempool *ltran_create_tx_mbuf_pool(uint32_t bond_port_index) + return NULL; + } + +- return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, GAZELLE_MBUFF_PRIV_SIZE, ++ uint16_t private_size = RTE_ALIGN(MBUF_PRIVATE_SIZE, RTE_CACHE_LINE_SIZE); ++ return rte_pktmbuf_pool_create(mbuf_pool_name, num_mbufs, GAZELLE_MBUFS_CACHE_SIZE, private_size, + RTE_MBUF_DEFAULT_BUF_SIZE, (int32_t)rte_socket_id()); + } + +-- +2.23.0 + diff --git a/0177-support-set-main-thread-affinity.patch b/0177-support-set-main-thread-affinity.patch new file mode 100644 index 0000000..ec2c786 --- /dev/null +++ b/0177-support-set-main-thread-affinity.patch @@ -0,0 +1,155 @@ +From c6d8e28b70d6ac86c1f8df6b94c179be4f8109f0 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Wed, 28 Dec 2022 17:05:57 +0800 +Subject: [PATCH 2/3] support-set-main-thread-affinity + +--- + src/lstack/core/lstack_cfg.c | 18 ++++++++++++++++++ + src/lstack/core/lstack_init.c | 18 +++++++++++------- + src/lstack/core/lstack_protocol_stack.c | 4 +--- + src/lstack/include/lstack_cfg.h | 1 + + src/lstack/lstack.conf | 5 +++++ + 5 files changed, 36 insertions(+), 10 deletions(-) + +diff --git a/src/lstack/core/lstack_cfg.c b/src/lstack/core/lstack_cfg.c +index 55d16f9..4912fdd 100644 +--- a/src/lstack/core/lstack_cfg.c ++++ b/src/lstack/core/lstack_cfg.c +@@ -56,6 +56,7 @@ static int32_t parse_gateway_addr(void); + static int32_t parse_kni_switch(void); + static int32_t parse_listen_shadow(void); + static int32_t parse_app_bind_numa(void); ++static int32_t parse_main_thread_affinity(void); + static int32_t parse_unix_prefix(void); + static int32_t parse_rxtx_pool_size(void); + static int32_t parse_send_connect_number(void); +@@ -81,6 +82,7 @@ static struct config_vector_t g_config_tbl[] = { + { "kni_switch", parse_kni_switch }, + { "listen_shadow", parse_listen_shadow }, + { "app_bind_numa", parse_app_bind_numa }, ++ { "main_thread_affinity", parse_main_thread_affinity }, + { "unix_prefix", parse_unix_prefix }, + { "mbuf_pool_size", parse_rxtx_pool_size }, + { "send_connect_number", parse_send_connect_number }, +@@ -847,6 +849,22 @@ static int32_t parse_app_bind_numa(void) + return 0; + } + ++static int32_t parse_main_thread_affinity(void) ++{ ++ const config_setting_t *arg = NULL; ++ ++ arg = config_lookup(&g_config, "main_thread_affinity"); ++ if (arg == NULL) { ++ g_config_params.main_thread_affinity = false; ++ return 0; ++ } ++ ++ int32_t val = config_setting_get_int(arg); ++ g_config_params.main_thread_affinity = (val == 0) ? false : true; ++ ++ return 0; ++} ++ + static int32_t parse_kni_switch(void) + { + const config_setting_t *arg = NULL; +diff --git a/src/lstack/core/lstack_init.c b/src/lstack/core/lstack_init.c +index f647b8e..6347ab1 100644 +--- a/src/lstack/core/lstack_init.c ++++ b/src/lstack/core/lstack_init.c +@@ -223,18 +223,18 @@ static void create_control_thread(void) + + pthread_t tid; + if (use_ltran()) { ++ ret = pthread_create(&tid, NULL, (void *(*)(void *))control_client_thread, NULL); + dpdk_skip_nic_init(); + if (control_init_client(false) != 0) { + LSTACK_EXIT(1, "control_init_client failed\n"); + } +- ret = pthread_create(&tid, NULL, (void *(*)(void *))control_client_thread, NULL); + } else { ++ ret = pthread_create(&tid, NULL, (void *(*)(void *))control_server_thread, NULL); + ret = dpdk_eal_init(); + if (ret < 0) { + LSTACK_EXIT(1, "dpdk_eal_init failed ret=%d errno=%d\n", ret, errno); + } + +- ret = pthread_create(&tid, NULL, (void *(*)(void *))control_server_thread, NULL); + } + if (ret != 0) { + LSTACK_EXIT(1, "pthread_create failed ret=%d errno=%d\n", ret, errno); +@@ -295,9 +295,11 @@ __attribute__((constructor)) void gazelle_network_init(void) + + /* + * save initial affinity */ +- if (thread_affinity_default() < 0) { +- LSTACK_PRE_LOG(LSTACK_ERR, "pthread_getaffinity_np failed\n"); +- LSTACK_EXIT(1, "pthread_getaffinity_np failed\n"); ++ if (!get_global_cfg_params()->main_thread_affinity) { ++ if (thread_affinity_default() < 0) { ++ LSTACK_PRE_LOG(LSTACK_ERR, "pthread_getaffinity_np failed\n"); ++ LSTACK_EXIT(1, "pthread_getaffinity_np failed\n"); ++ } + } + + gazelle_signal_init(); +@@ -309,8 +311,10 @@ __attribute__((constructor)) void gazelle_network_init(void) + + /* + * cancel the core binding from DPDK initialization */ +- if (thread_affinity_default() < 0) { +- LSTACK_EXIT(1, "pthread_setaffinity_np failed\n"); ++ if (!get_global_cfg_params()->main_thread_affinity) { ++ if (thread_affinity_default() < 0) { ++ LSTACK_EXIT(1, "pthread_setaffinity_np failed\n"); ++ } + } + + lstack_log_level_init(); +diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c +index 9cc8946..ad9d026 100644 +--- a/src/lstack/core/lstack_protocol_stack.c ++++ b/src/lstack/core/lstack_protocol_stack.c +@@ -255,9 +255,7 @@ static void* gazelle_kernelevent_thread(void *arg) + uint16_t queue_id = *(uint16_t *)arg; + struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id]; + +- if (get_global_cfg_params()->app_bind_numa) { +- bind_to_stack_numa(stack); +- } ++ bind_to_stack_numa(stack); + + LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id); + +diff --git a/src/lstack/include/lstack_cfg.h b/src/lstack/include/lstack_cfg.h +index e33a484..bdaa083 100644 +--- a/src/lstack/include/lstack_cfg.h ++++ b/src/lstack/include/lstack_cfg.h +@@ -83,6 +83,7 @@ struct cfg_params { + bool kni_switch; + bool listen_shadow; // true:listen in all stack thread. false:listen in one stack thread. + bool app_bind_numa; ++ bool main_thread_affinity; + int dpdk_argc; + char **dpdk_argv; + struct secondary_attach_arg sec_attach_arg; +diff --git a/src/lstack/lstack.conf b/src/lstack/lstack.conf +index 47140ad..c27db22 100644 +--- a/src/lstack/lstack.conf ++++ b/src/lstack/lstack.conf +@@ -34,6 +34,11 @@ num_cpus="2" + #each cpu core start a wakeup thread. + #num_wakeup="3" + ++#app worker thread bind to numa in epoll/poll. ++app_bind_numa=1 ++#app main thread affinity set by dpdk. ++main_thread_affinity=0 ++ + host_addr="192.168.1.10" + mask_addr="255.255.255.0" + gateway_addr="192.168.1.1" +-- +2.23.0 + diff --git a/0178-reduce-epoll-wakeup.patch b/0178-reduce-epoll-wakeup.patch new file mode 100644 index 0000000..397e341 --- /dev/null +++ b/0178-reduce-epoll-wakeup.patch @@ -0,0 +1,147 @@ +From 99f46a3e20d44ec8736becee896ed519971aab52 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Wed, 28 Dec 2022 18:26:14 +0800 +Subject: [PATCH 3/3] reduce epoll wakeup + +--- + src/lstack/api/lstack_epoll.c | 28 +++++++++++-------------- + src/lstack/core/lstack_lwip.c | 1 + + src/lstack/core/lstack_protocol_stack.c | 8 +++---- + 3 files changed, 17 insertions(+), 20 deletions(-) + +diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c +index 605984f..7860163 100644 +--- a/src/lstack/api/lstack_epoll.c ++++ b/src/lstack/api/lstack_epoll.c +@@ -186,7 +186,7 @@ int32_t lstack_do_epoll_create(int32_t fd) + GAZELLE_RETURN(EINVAL); + } + pthread_mutex_trylock(&wakeup->wait); +- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); ++ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE); + + struct protocol_stack_group *stack_group = get_protocol_stack_group(); + init_list_node_null(&wakeup->poll_list); +@@ -473,6 +473,7 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe + } + + do { ++ __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); + lwip_num = epoll_lwip_event(wakeup, events, maxevents); + wakeup->stat.app_events += lwip_num; + +@@ -484,11 +485,11 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe + } + + if (lwip_num + kernel_num > 0) { +- return lwip_num + kernel_num; ++ break; + } + + if (timeout == 0) { +- return 0; ++ break; + } + + if (timeout < 0) { +@@ -498,13 +499,10 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe + ms_to_timespec(&epoll_time, timeout); + ret = pthread_mutex_timedlock(&wakeup->wait, &epoll_time); + } +- +- if (ret == 0) { +- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); +- } + } while (ret == 0); + +- return 0; ++ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE); ++ return lwip_num + kernel_num; + } + + static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup) +@@ -513,7 +511,7 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup) + GAZELLE_RETURN(EINVAL); + } + pthread_mutex_trylock(&wakeup->wait); +- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); ++ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE); + + for (uint32_t i = 0; i < PROTOCOL_STACK_MAX; i++) { + init_list_node_null(&wakeup->wakeup_list[i]); +@@ -680,6 +678,7 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout) + int32_t ret; + + do { ++ __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); + lwip_num = poll_lwip_event(fds, nfds); + + if (__atomic_load_n(&wakeup->have_kernel_event, __ATOMIC_ACQUIRE)) { +@@ -694,11 +693,11 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout) + } + + if (lwip_num + kernel_num > 0) { +- return lwip_num + kernel_num; ++ break; + } + + if (timeout == 0) { +- return 0; ++ break; + } + + if (timeout < 0) { +@@ -708,11 +707,8 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout) + ms_to_timespec(&epoll_time, timeout); + ret = pthread_mutex_timedlock(&wakeup->wait, &epoll_time); + } +- +- if (ret == 0) { +- __atomic_store_n(&wakeup->in_wait, true, __ATOMIC_RELEASE); +- } + } while (ret == 0); + +- return 0; ++ __atomic_store_n(&wakeup->in_wait, false, __ATOMIC_RELEASE); ++ return lwip_num + kernel_num; + } +diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c +index 31f87cf..01c7280 100644 +--- a/src/lstack/core/lstack_lwip.c ++++ b/src/lstack/core/lstack_lwip.c +@@ -598,6 +598,7 @@ void stack_send(struct rpc_msg *msg) + } + + __atomic_store_n(&sock->in_send, 0, __ATOMIC_RELEASE); ++ rte_mb(); + + /* have remain data or replenish again add sendlist */ + if (sock->errevent == 0 && NETCONN_IS_DATAOUT(sock)) { +diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c +index ad9d026..93204d1 100644 +--- a/src/lstack/core/lstack_protocol_stack.c ++++ b/src/lstack/core/lstack_protocol_stack.c +@@ -481,16 +481,16 @@ static void* gazelle_stack_thread(void *arg) + for (;;) { + poll_rpc_msg(stack, rpc_number); + ++ send_stack_list(stack, send_connect_number); ++ ++ stack_send_pkts(stack); ++ + stack_free_recv_pkts(stack, nic_read_number); + + gazelle_eth_dev_poll(stack, use_ltran_flag, nic_read_number); + + read_recv_list(stack, read_connect_number); + +- send_stack_list(stack, send_connect_number); +- +- stack_send_pkts(stack); +- + if ((wakeup_tick & 0xf) == 0) { + wakeup_kernel_event(stack); + wakeup_stack_epoll(stack, wakeup_thread_enable); +-- +2.23.0 + diff --git a/gazelle.spec b/gazelle.spec index 9b4b939..49b30aa 100644 --- a/gazelle.spec +++ b/gazelle.spec @@ -2,7 +2,7 @@ Name: gazelle Version: 1.0.1 -Release: 41 +Release: 42 Summary: gazelle is a high performance user-mode stack License: MulanPSL-2.0 URL: https://gitee.com/openeuler/gazelle @@ -190,6 +190,9 @@ Patch9172: 0172-fix-send-pkts-bluk-err.patch Patch9173: 0173-free-recv-pkts-bluks.patch Patch9174: 0174-fix-lstack-Makefile-warning.patch Patch9175: 0175-fix-null-pointer-deref-in-stack_broadcast_close.patch +Patch9176: 0176-pbuf-align-cache-line.patch +Patch9177: 0177-support-set-main-thread-affinity.patch +Patch9178: 0178-reduce-epoll-wakeup.patch %description %{name} is a high performance user-mode stack. @@ -230,6 +233,11 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b %config(noreplace) %{conf_path}/ltran.conf %changelog +* Wed Dec 28 2022 wuchangsheng - 1.0.1-42 +- pbuf cacheline align + support main thread affinity + reduce epoll wakeup + * Fri Dec 23 2022 kircher - 1.0.1-41 - fix null pointer deref in stack_broadcast_close - fix lstack Makefile warning