!97 [sync] PR-95: ceph客户端性能调优

From: @openeuler-sync-bot 
Reviewed-by: @wu-changsheng 
Signed-off-by: @wu-changsheng
This commit is contained in:
openeuler-ci-bot 2022-11-04 02:50:57 +00:00 committed by Gitee
commit a81111fd9e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
8 changed files with 939 additions and 1 deletions

View File

@ -0,0 +1,96 @@
From f657eef24e947bb2d581599ddaf016b51ac349ae Mon Sep 17 00:00:00 2001
From: compile_success <980965867@qq.com>
Date: Sat, 22 Oct 2022 06:26:19 +0000
Subject: [PATCH 1/7] add writev and readv
---
src/lstack/api/lstack_wrap.c | 44 ++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/src/lstack/api/lstack_wrap.c b/src/lstack/api/lstack_wrap.c
index 1c1dcdd..9672d3d 100644
--- a/src/lstack/api/lstack_wrap.c
+++ b/src/lstack/api/lstack_wrap.c
@@ -333,6 +333,20 @@ static inline ssize_t do_read(int32_t s, void *mem, size_t len)
return posix_api->read_fn(s, mem, len);
}
+static inline ssize_t do_readv(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ struct msghdr msg;
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = LWIP_CONST_CAST(struct iovec *, iov);
+ msg.msg_iovlen = iovcnt;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+ return recvmsg_from_stack(s, &msg, 0);
+}
+
static inline ssize_t do_send(int32_t sockfd, const void *buf, size_t len, int32_t flags)
{
if (select_path(sockfd) != PATH_LWIP) {
@@ -351,6 +365,20 @@ static inline ssize_t do_write(int32_t s, const void *mem, size_t size)
return gazelle_send(s, mem, size, 0);
}
+static inline ssize_t do_writev(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ struct msghdr msg;
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = LWIP_CONST_CAST(struct iovec *, iov);
+ msg.msg_iovlen = iovcnt;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+ return sendmsg_to_stack(s, &msg, 0);
+}
+
static inline ssize_t do_recvmsg(int32_t s, struct msghdr *message, int32_t flags)
{
if (message == NULL) {
@@ -526,14 +554,26 @@ ssize_t read(int32_t s, void *mem, size_t len)
{
return do_read(s, mem, len);
}
+ssize_t readv(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ return do_readv(s, iov, iovcnt);
+}
ssize_t write(int32_t s, const void *mem, size_t size)
{
return do_write(s, mem, size);
}
+ssize_t writev(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ return do_writev(s, iov, iovcnt);
+}
ssize_t __wrap_write(int32_t s, const void *mem, size_t size)
{
return do_write(s, mem, size);
}
+ssize_t __wrap_writev(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ return do_writev(s, iov, iovcnt);
+}
ssize_t recv(int32_t sockfd, void *buf, size_t len, int32_t flags)
{
return do_recv(sockfd, buf, len, flags);
@@ -649,6 +689,10 @@ ssize_t __wrap_read(int32_t s, void *mem, size_t len)
{
return do_read(s, mem, len);
}
+ssize_t __wrap_readv(int32_t s, const struct iovec *iov, int iovcnt)
+{
+ return do_readv(s, iov, iovcnt);
+}
ssize_t __wrap_recv(int32_t sockfd, void *buf, size_t len, int32_t flags)
{
return do_recv(sockfd, buf, len, flags);
--
2.23.0

View File

@ -0,0 +1,150 @@
From 3ac0281accfc1cbaa0fa70ef8be7f706e56efe8f Mon Sep 17 00:00:00 2001
From: kircher <majun65@huawei.com>
Date: Fri, 28 Oct 2022 22:20:35 +0800
Subject: [PATCH 2/7] optimized some function in lstack
---
src/lstack/core/lstack_protocol_stack.c | 5 +--
src/lstack/core/lstack_thread_rpc.c | 4 +--
src/lstack/include/lstack_ethdev.h | 3 +-
src/lstack/netif/lstack_ethdev.c | 43 ++++++++++++++++++++++---
4 files changed, 45 insertions(+), 10 deletions(-)
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index 79769f3..41dda89 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -419,6 +419,7 @@ static void wakeup_kernel_event(struct protocol_stack *stack)
static void* gazelle_stack_thread(void *arg)
{
uint16_t queue_id = *(uint16_t *)arg;
+ bool use_ltran_flag = use_ltran();
struct protocol_stack *stack = stack_thread_init(queue_id);
if (stack == NULL) {
@@ -435,7 +436,7 @@ static void* gazelle_stack_thread(void *arg)
for (;;) {
poll_rpc_msg(stack, HANDLE_RPC_MSG_MAX);
- eth_dev_poll();
+ gazelle_eth_dev_poll(stack, use_ltran_flag);
read_recv_list(stack, READ_LIST_MAX);
@@ -523,7 +524,7 @@ void stack_arp(struct rpc_msg *msg)
{
struct rte_mbuf *mbuf = (struct rte_mbuf *)msg->args[MSG_ARG_0].p;
- eth_dev_recv(mbuf);
+ eth_dev_recv(mbuf, NULL);
}
void stack_socket(struct rpc_msg *msg)
diff --git a/src/lstack/core/lstack_thread_rpc.c b/src/lstack/core/lstack_thread_rpc.c
index 46cbbe7..db1de5a 100644
--- a/src/lstack/core/lstack_thread_rpc.c
+++ b/src/lstack/core/lstack_thread_rpc.c
@@ -108,11 +108,9 @@ static inline __attribute__((always_inline)) int32_t rpc_sync_call(lockless_queu
void poll_rpc_msg(struct protocol_stack *stack, uint32_t max_num)
{
- uint32_t num;
struct rpc_msg *msg = NULL;
- num = 0;
- while (num++ < max_num) {
+ while (max_num--) {
lockless_queue_node *node = lockless_queue_mpsc_pop(&stack->rpc_queue);
if (node == NULL) {
break;
diff --git a/src/lstack/include/lstack_ethdev.h b/src/lstack/include/lstack_ethdev.h
index 91f5f13..a174978 100644
--- a/src/lstack/include/lstack_ethdev.h
+++ b/src/lstack/include/lstack_ethdev.h
@@ -22,6 +22,7 @@ struct eth_dev_ops {
int32_t ethdev_init(struct protocol_stack *stack);
int32_t eth_dev_poll(void);
-void eth_dev_recv(struct rte_mbuf *mbuf);
+int32_t gazelle_eth_dev_poll(struct protocol_stack *stack, bool use_ltran_flag);
+void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack);
#endif /* __GAZELLE_ETHDEV_H__ */
diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c
index 4757d72..5ddc0db 100644
--- a/src/lstack/netif/lstack_ethdev.c
+++ b/src/lstack/netif/lstack_ethdev.c
@@ -31,7 +31,7 @@
#define PKTMBUF_MALLOC_FLAG NULL
-void eth_dev_recv(struct rte_mbuf *mbuf)
+void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
{
int32_t ret;
void *payload = NULL;
@@ -39,7 +39,9 @@ void eth_dev_recv(struct rte_mbuf *mbuf)
struct pbuf *prev = NULL;
struct pbuf *head = NULL;
struct pbuf_custom *pc = NULL;
- struct protocol_stack *stack = get_protocol_stack();
+ if (!stack) {
+ stack = get_protocol_stack();
+ }
struct rte_mbuf *m = mbuf;
uint16_t len, pkt_len;
@@ -88,7 +90,7 @@ int32_t eth_dev_poll(void)
nr_pkts = stack->dev_ops->rx_poll(stack, pkts, READ_PKTS_MAX);
if (nr_pkts == 0) {
- return nr_pkts;
+ return 0;
}
if (!use_ltran() && get_protocol_stack_group()->latency_start) {
@@ -105,7 +107,40 @@ int32_t eth_dev_poll(void)
}
}
- eth_dev_recv(pkts[i]);
+ eth_dev_recv(pkts[i], stack);
+ }
+
+ stack->stats.rx += nr_pkts;
+
+ return nr_pkts;
+}
+
+/* optimized eth_dev_poll() in lstack */
+int32_t gazelle_eth_dev_poll(struct protocol_stack *stack, bool use_ltran_flag)
+{
+ uint32_t nr_pkts;
+ struct rte_mbuf *pkts[READ_PKTS_MAX];
+
+ nr_pkts = stack->dev_ops->rx_poll(stack, pkts, READ_PKTS_MAX);
+ if (nr_pkts == 0) {
+ return 0;
+ }
+
+ if (!use_ltran_flag && get_protocol_stack_group()->latency_start) {
+ uint64_t time_stamp = get_current_time();
+ time_stamp_into_mbuf(nr_pkts, pkts, time_stamp);
+ }
+
+ for (uint32_t i = 0; i < nr_pkts; i++) {
+ /* copy arp into other stack */
+ if (!use_ltran_flag) {
+ struct rte_ether_hdr *ethh = rte_pktmbuf_mtod(pkts[i], struct rte_ether_hdr *);
+ if (unlikely(RTE_BE16(RTE_ETHER_TYPE_ARP) == ethh->ether_type)) {
+ stack_broadcast_arp(pkts[i], stack);
+ }
+ }
+
+ eth_dev_recv(pkts[i], stack);
}
stack->stats.rx += nr_pkts;
--
2.23.0

View File

@ -0,0 +1,47 @@
From 12f35b035e84c5085869ae3057733638452dcf7a Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Mon, 31 Oct 2022 17:23:49 +0800
Subject: [PATCH 3/7] fix gazellectl stats err when donot bind numa
---
src/lstack/api/lstack_epoll.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c
index 417d69c..6ac049c 100644
--- a/src/lstack/api/lstack_epoll.c
+++ b/src/lstack/api/lstack_epoll.c
@@ -172,8 +172,11 @@ int32_t lstack_do_epoll_create(int32_t fd)
wakeup->epollfd = fd;
sock->wakeup = wakeup;
- update_epoll_max_stack(wakeup);
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack);
+ if (!get_global_cfg_params()->app_bind_numa) {
+ update_epoll_max_stack(wakeup);
+ change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack);
+ wakeup->bind_stack = wakeup->max_stack;
+ }
return fd;
}
@@ -505,10 +508,12 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup)
list_add_node(&stack_group->poll_list, &wakeup->poll_list);
pthread_spin_unlock(&stack_group->poll_list_lock);
- int32_t stack_count[PROTOCOL_STACK_MAX] = {0};
- uint16_t bind_id = find_max_cnt_stack(stack_count, stack_group->stack_num, wakeup->bind_stack);
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]);
- wakeup->bind_stack = stack_group->stacks[bind_id];
+ if (!get_global_cfg_params()->app_bind_numa) {
+ int32_t stack_count[PROTOCOL_STACK_MAX] = {0};
+ uint16_t bind_id = find_max_cnt_stack(stack_count, stack_group->stack_num, wakeup->bind_stack);
+ change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]);
+ wakeup->bind_stack = stack_group->stacks[bind_id];
+ }
return 0;
}
--
2.23.0

View File

@ -0,0 +1,45 @@
From 701e207702945705c0f8a94babf09d1fe8c2b3da Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Mon, 31 Oct 2022 17:37:08 +0800
Subject: [PATCH 4/7] add usleep when write_ring is busy
---
src/lstack/core/lstack_lwip.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
index 00afc75..3a1eb81 100644
--- a/src/lstack/core/lstack_lwip.c
+++ b/src/lstack/core/lstack_lwip.c
@@ -282,16 +282,11 @@ ssize_t write_stack_data(struct lwip_sock *sock, const void *buf, size_t len)
GAZELLE_RETURN(ENOTCONN);
}
- uint32_t free_count = gazelle_ring_readable_count(sock->send_ring);
- if (free_count == 0) {
- return 0;
- }
-
struct pbuf *pbuf = NULL;
ssize_t send_len = 0;
uint32_t send_pkt = 0;
- while (send_len < len && send_pkt < free_count) {
+ while (send_len < len) {
if (sock->send_lastdata) {
pbuf = sock->send_lastdata;
} else {
@@ -322,6 +317,10 @@ ssize_t write_stack_data(struct lwip_sock *sock, const void *buf, size_t len)
sock->wakeup->stat.app_write_cnt += send_pkt;
}
+ if (send_len == 0) {
+ /* 100: give up cpu to other threads, when send_ring is full */
+ usleep(100);
+ }
return send_len;
}
--
2.23.0

View File

@ -0,0 +1,432 @@
From ac2c6340d9e08b94bf99682e12ab5d5836d2140d Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Mon, 31 Oct 2022 22:07:26 +0800
Subject: [PATCH 5/7] optimize variable access
---
src/common/gazelle_opt.h | 1 +
src/lstack/api/lstack_wrap.c | 8 +-
src/lstack/core/lstack_lwip.c | 102 +++++++++------------
src/lstack/core/lstack_protocol_stack.c | 24 +++--
src/lstack/core/lstack_thread_rpc.c | 2 +
src/lstack/include/lstack_protocol_stack.h | 1 +
src/lstack/netif/lstack_ethdev.c | 5 +-
src/lstack/netif/lstack_vdev.c | 7 +-
8 files changed, 67 insertions(+), 83 deletions(-)
diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h
index 011553c..8ab40ed 100644
--- a/src/common/gazelle_opt.h
+++ b/src/common/gazelle_opt.h
@@ -31,6 +31,7 @@
#define GAZELLE_MBUFF_PRIV_SIZE (sizeof(uint64_t) * 2)
#define DEFAULT_RING_SIZE (512)
+#define DEFAULT_RING_MASK (511)
#define DEFAULT_BACKUP_RING_SIZE_FACTOR (16)
#define VDEV_RX_QUEUE_SZ DEFAULT_RING_SIZE
diff --git a/src/lstack/api/lstack_wrap.c b/src/lstack/api/lstack_wrap.c
index 9672d3d..1c7a722 100644
--- a/src/lstack/api/lstack_wrap.c
+++ b/src/lstack/api/lstack_wrap.c
@@ -46,6 +46,10 @@ bool select_thread_path(void);
static enum KERNEL_LWIP_PATH select_path(int fd)
{
+ if (!select_thread_path()) {
+ return PATH_KERNEL;
+ }
+
if (unlikely(posix_api == NULL)) {
/* posix api maybe call before gazelle init */
if (posix_api_init() != 0) {
@@ -54,10 +58,6 @@ static enum KERNEL_LWIP_PATH select_path(int fd)
return PATH_KERNEL;
}
- if (!select_thread_path()) {
- return PATH_KERNEL;
- }
-
if (unlikely(posix_api->ues_posix)) {
return PATH_KERNEL;
}
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
index 3a1eb81..f924ee7 100644
--- a/src/lstack/core/lstack_lwip.c
+++ b/src/lstack/core/lstack_lwip.c
@@ -37,8 +37,6 @@
#define HALF_DIVISOR (2)
#define USED_IDLE_WATERMARK (VDEV_IDLE_QUEUE_SZ >> 2)
-static int32_t lwip_alloc_pbufs(pbuf_layer layer, uint16_t length, pbuf_type type, void **pbufs, uint32_t num);
-
static void free_ring_pbuf(struct rte_ring *ring)
{
void *pbufs[SOCK_RECV_RING_SIZE];
@@ -85,17 +83,38 @@ static void reset_sock_data(struct lwip_sock *sock)
sock->recv_lastdata = NULL;
}
-static void replenish_send_idlembuf(struct rte_ring *ring)
+static struct pbuf *init_mbuf_to_pbuf(struct rte_mbuf *mbuf, pbuf_layer layer, uint16_t length, pbuf_type type)
+{
+ struct pbuf_custom *pbuf_custom = mbuf_to_pbuf(mbuf);
+ pbuf_custom->custom_free_function = gazelle_free_pbuf;
+
+ void *data = rte_pktmbuf_mtod(mbuf, void *);
+ struct pbuf *pbuf = pbuf_alloced_custom(layer, length, type, pbuf_custom, data, MAX_PACKET_SZ);
+ if (pbuf) {
+ pbuf->ol_flags = 0;
+ pbuf->l2_len = 0;
+ pbuf->l3_len = 0;
+ }
+
+ return pbuf;
+}
+
+static void replenish_send_idlembuf(struct protocol_stack *stack, struct rte_ring *ring)
{
void *pbuf[SOCK_SEND_RING_SIZE];
uint32_t replenish_cnt = gazelle_ring_free_count(ring);
uint32_t alloc_num = LWIP_MIN(replenish_cnt, RING_SIZE(SOCK_SEND_RING_SIZE));
- if (lwip_alloc_pbufs(PBUF_TRANSPORT, TCP_MSS, PBUF_RAM, (void **)pbuf, alloc_num) != 0) {
+ if (rte_pktmbuf_alloc_bulk(stack->tx_pktmbuf_pool, (struct rte_mbuf **)pbuf, alloc_num) != 0) {
+ stack->stats.tx_allocmbuf_fail++;
return;
}
+ for (uint32_t i = 0; i < alloc_num; i++) {
+ pbuf[i] = init_mbuf_to_pbuf(pbuf[i], PBUF_TRANSPORT, TCP_MSS, PBUF_RAM);
+ }
+
uint32_t num = gazelle_ring_sp_enqueue(ring, pbuf, alloc_num);
for (uint32_t i = num; i < alloc_num; i++) {
pbuf_free(pbuf[i]);
@@ -126,7 +145,7 @@ void gazelle_init_sock(int32_t fd)
LSTACK_LOG(ERR, LSTACK, "sock_send create failed. errno: %d.\n", rte_errno);
return;
}
- replenish_send_idlembuf(sock->send_ring);
+ replenish_send_idlembuf(stack, sock->send_ring);
sock->stack = stack;
sock->stack->conn_num++;
@@ -183,46 +202,17 @@ int32_t gazelle_alloc_pktmbuf(struct rte_mempool *pool, struct rte_mbuf **mbufs,
return 0;
}
-static struct pbuf *init_mbuf_to_pbuf(struct rte_mbuf *mbuf, pbuf_layer layer, uint16_t length, pbuf_type type)
-{
- struct pbuf_custom *pbuf_custom = mbuf_to_pbuf(mbuf);
- pbuf_custom->custom_free_function = gazelle_free_pbuf;
-
- void *data = rte_pktmbuf_mtod(mbuf, void *);
- struct pbuf *pbuf = pbuf_alloced_custom(layer, length, type, pbuf_custom, data, MAX_PACKET_SZ);
- if (pbuf) {
- pbuf->ol_flags = 0;
- pbuf->l2_len = 0;
- pbuf->l3_len = 0;
- }
-
- return pbuf;
-}
-
-static int32_t lwip_alloc_pbufs(pbuf_layer layer, uint16_t length, pbuf_type type, void **bufs, uint32_t num)
-{
- int32_t ret = rte_pktmbuf_alloc_bulk(get_protocol_stack()->tx_pktmbuf_pool, (struct rte_mbuf **)bufs, num);
- if (ret != 0) {
- get_protocol_stack()->stats.tx_allocmbuf_fail++;
- return -1;
- }
-
- for (uint32_t i = 0; i < num; i++) {
- bufs[i] = init_mbuf_to_pbuf(bufs[i], layer, length, type);
- }
-
- return 0;
-}
-
struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type)
{
- struct pbuf *pbuf;
+ struct rte_mbuf *mbuf;
+ struct protocol_stack *stack = get_protocol_stack();
- if (lwip_alloc_pbufs(layer, length, type, (void **)&pbuf, 1) != 0) {
+ if (rte_pktmbuf_alloc_bulk(stack->tx_pktmbuf_pool, &mbuf, 1) != 0) {
+ stack->stats.tx_allocmbuf_fail++;
return NULL;
}
- return pbuf;
+ return init_mbuf_to_pbuf(mbuf, layer, length, type);
}
struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags)
@@ -324,7 +314,7 @@ ssize_t write_stack_data(struct lwip_sock *sock, const void *buf, size_t len)
return send_len;
}
-static void do_lwip_send(int32_t fd, struct lwip_sock *sock, int32_t flags)
+static void do_lwip_send(struct protocol_stack *stack, int32_t fd, struct lwip_sock *sock, int32_t flags)
{
/* send all send_ring, so len set lwip send max. */
ssize_t len = lwip_send(fd, sock, UINT16_MAX, flags);
@@ -335,7 +325,7 @@ static void do_lwip_send(int32_t fd, struct lwip_sock *sock, int32_t flags)
}
if (gazelle_ring_readable_count(sock->send_ring) < SOCK_SEND_REPLENISH_THRES) {
- replenish_send_idlembuf(sock->send_ring);
+ replenish_send_idlembuf(stack, sock->send_ring);
}
if ((sock->epoll_events & EPOLLOUT) && NETCONN_IS_OUTIDLE(sock)) {
@@ -347,8 +337,7 @@ void stack_send(struct rpc_msg *msg)
{
int32_t fd = msg->args[MSG_ARG_0].i;
int32_t flags = msg->args[MSG_ARG_2].i;
-
- struct protocol_stack *stack = get_protocol_stack();
+ struct protocol_stack *stack = (struct protocol_stack *)msg->args[MSG_ARG_3].p;
struct lwip_sock *sock = get_socket(fd);
if (sock == NULL) {
@@ -363,7 +352,7 @@ void stack_send(struct rpc_msg *msg)
return;
}
- do_lwip_send(fd, sock, flags);
+ do_lwip_send(stack, fd, sock, flags);
/* have remain data add sendlist */
if (NETCONN_IS_DATAOUT(sock)) {
@@ -392,7 +381,7 @@ void send_stack_list(struct protocol_stack *stack, uint32_t send_max)
continue;
}
- do_lwip_send(sock->conn->socket, sock, 0);
+ do_lwip_send(stack, sock->conn->socket, sock, 0);
if (!NETCONN_IS_DATAOUT(sock)) {
list_del_node_null(&sock->send_list);
@@ -542,11 +531,7 @@ ssize_t gazelle_send(int32_t fd, const void *buf, size_t len, int32_t flags)
return 0;
}
- struct lwip_sock *sock = get_socket(fd);
- if (sock == NULL) {
- GAZELLE_RETURN(EINVAL);
- }
-
+ struct lwip_sock *sock = get_socket_by_fd(fd);
ssize_t send = write_stack_data(sock, buf, len);
if (send <= 0) {
return send;
@@ -562,11 +547,7 @@ ssize_t sendmsg_to_stack(int32_t s, const struct msghdr *message, int32_t flags)
int32_t ret;
int32_t i;
ssize_t buflen = 0;
-
- struct lwip_sock *sock = get_socket(s);
- if (sock == NULL) {
- GAZELLE_RETURN(EINVAL);
- }
+ struct lwip_sock *sock = get_socket_by_fd(s);
if (check_msg_vaild(message)) {
GAZELLE_RETURN(EINVAL);
@@ -635,17 +616,16 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
struct pbuf *pbuf = NULL;
ssize_t recvd = 0;
uint16_t copy_len;
-
- struct lwip_sock *sock = get_socket(fd);
- if (sock == NULL) {
- LSTACK_LOG(ERR, LSTACK, "get_socket null fd %d.\n", fd);
- GAZELLE_RETURN(EINVAL);
- }
+ struct lwip_sock *sock = get_socket_by_fd(fd);
if (sock->errevent > 0 && !NETCONN_IS_DATAIN(sock)) {
return 0;
}
+ if (recv_left > UINT16_MAX) {
+ recv_left = UINT16_MAX;
+ }
+
while (recv_left > 0) {
if (sock->recv_lastdata) {
pbuf = sock->recv_lastdata;
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index 41dda89..2759d7d 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -36,9 +36,9 @@
#include "posix/lstack_epoll.h"
#include "lstack_stack_stat.h"
-#define READ_LIST_MAX 128
-#define SEND_LIST_MAX 128
-#define HANDLE_RPC_MSG_MAX 128
+#define READ_LIST_MAX 32
+#define SEND_LIST_MAX 32
+#define HANDLE_RPC_MSG_MAX 32
#define KERNEL_EVENT_100us 100
static PER_THREAD struct protocol_stack *g_stack_p = NULL;
@@ -164,7 +164,7 @@ void low_power_idling(struct protocol_stack *stack)
last_cycle_ts = sys_now();
}
- uint64_t now_pkts = get_protocol_stack()->stats.rx;
+ uint64_t now_pkts = stack->stats.rx;
uint32_t now_ts = sys_now();
if (((now_ts - last_cycle_ts) > LSTACK_LPM_DETECT_MS) ||
((now_pkts - last_cycle_pkts) >= LSTACK_LPM_PKTS_IN_DETECT)) {
@@ -258,7 +258,9 @@ static void* gazelle_kernelevent_thread(void *arg)
uint16_t queue_id = *(uint16_t *)arg;
struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id];
- bind_to_stack_numa(stack);
+ if (get_global_cfg_params()->app_bind_numa) {
+ bind_to_stack_numa(stack);
+ }
LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id);
@@ -420,6 +422,7 @@ static void* gazelle_stack_thread(void *arg)
{
uint16_t queue_id = *(uint16_t *)arg;
bool use_ltran_flag = use_ltran();
+ uint32_t wakeup_tick = 0;
struct protocol_stack *stack = stack_thread_init(queue_id);
if (stack == NULL) {
@@ -442,9 +445,11 @@ static void* gazelle_stack_thread(void *arg)
send_stack_list(stack, SEND_LIST_MAX);
- wakeup_kernel_event(stack);
-
- wakeup_stack_epoll(stack);
+ if ((wakeup_tick & 0xf) == 0) {
+ wakeup_kernel_event(stack);
+ wakeup_stack_epoll(stack);
+ }
+ wakeup_tick++;
sys_timer_run();
@@ -523,8 +528,9 @@ int32_t init_protocol_stack(void)
void stack_arp(struct rpc_msg *msg)
{
struct rte_mbuf *mbuf = (struct rte_mbuf *)msg->args[MSG_ARG_0].p;
+ struct protocol_stack *stack = (struct protocol_stack*)msg->args[MSG_ARG_1].p;
- eth_dev_recv(mbuf, NULL);
+ eth_dev_recv(mbuf, stack);
}
void stack_socket(struct rpc_msg *msg)
diff --git a/src/lstack/core/lstack_thread_rpc.c b/src/lstack/core/lstack_thread_rpc.c
index db1de5a..295baf3 100644
--- a/src/lstack/core/lstack_thread_rpc.c
+++ b/src/lstack/core/lstack_thread_rpc.c
@@ -234,6 +234,7 @@ int32_t rpc_call_arp(struct protocol_stack *stack, struct rte_mbuf *mbuf)
msg->self_release = 0;
msg->args[MSG_ARG_0].p = mbuf;
+ msg->args[MSG_ARG_1].p = stack;
rpc_call(&stack->rpc_queue, msg);
@@ -451,6 +452,7 @@ int32_t rpc_call_send(int fd, const void *buf, size_t len, int flags)
msg->args[MSG_ARG_0].i = fd;
msg->args[MSG_ARG_1].size = len;
msg->args[MSG_ARG_2].i = flags;
+ msg->args[MSG_ARG_3].p = stack;
msg->self_release = 0;
rpc_call(&stack->rpc_queue, msg);
diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h
index cc2cfb9..fed1882 100644
--- a/src/lstack/include/lstack_protocol_stack.h
+++ b/src/lstack/include/lstack_protocol_stack.h
@@ -49,6 +49,7 @@ struct protocol_stack {
struct rte_ring *reg_ring;
struct rte_ring *wakeup_ring;
struct reg_ring_msg *reg_buf;
+ uint32_t reg_head;
volatile bool low_power;
lockless_queue rpc_queue __rte_cache_aligned;
diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c
index 5ddc0db..3abed5e 100644
--- a/src/lstack/netif/lstack_ethdev.c
+++ b/src/lstack/netif/lstack_ethdev.c
@@ -39,9 +39,6 @@ void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
struct pbuf *prev = NULL;
struct pbuf *head = NULL;
struct pbuf_custom *pc = NULL;
- if (!stack) {
- stack = get_protocol_stack();
- }
struct rte_mbuf *m = mbuf;
uint16_t len, pkt_len;
@@ -81,7 +78,7 @@ void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
}
}
-#define READ_PKTS_MAX 128
+#define READ_PKTS_MAX 32
int32_t eth_dev_poll(void)
{
uint32_t nr_pkts;
diff --git a/src/lstack/netif/lstack_vdev.c b/src/lstack/netif/lstack_vdev.c
index f9fa5a3..1c148e1 100644
--- a/src/lstack/netif/lstack_vdev.c
+++ b/src/lstack/netif/lstack_vdev.c
@@ -113,7 +113,6 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
uint32_t sent_pkts = 0;
void *free_buf[VDEV_REG_QUEUE_SZ];
struct reg_ring_msg *tmp_buf = NULL;
- static PER_THREAD uint32_t head = 0;
const uint32_t tbegin = sys_now();
struct protocol_stack *stack = get_protocol_stack();
@@ -124,6 +123,7 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
}
}
+ uint32_t reg_index = stack->reg_head++ & DEFAULT_RING_MASK;
do {
(void)gazelle_ring_sc_dequeue(stack->reg_ring, free_buf, VDEV_REG_QUEUE_SZ);
@@ -131,7 +131,7 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
continue;
}
- tmp_buf = &stack->reg_buf[head];
+ tmp_buf = &stack->reg_buf[reg_index];
tmp_buf->type = type;
tmp_buf->tid = get_stack_tid();
ret = memcpy_s(&tmp_buf->qtuple, sizeof(*qtuple), qtuple, sizeof(struct gazelle_quintuple));
@@ -144,9 +144,6 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
sent_pkts = gazelle_ring_sp_enqueue(stack->reg_ring, free_buf, 1);
} while ((sent_pkts < 1) && (ENQUEUE_RING_RETRY_TIMEOUT > sys_now() - tbegin) && get_register_state());
- if (sent_pkts == 1) {
- head = (head + 1) % VDEV_REG_QUEUE_SZ;
- }
return (int32_t)sent_pkts;
}
--
2.23.0

133
0117-add-gro.patch Normal file
View File

@ -0,0 +1,133 @@
From b485a70ba6a68f10e4958843648400d3caaa4837 Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Tue, 1 Nov 2022 10:51:13 +0800
Subject: [PATCH 6/7] add gro
---
src/lstack/Makefile | 1 +
src/lstack/core/lstack_lwip.c | 10 ++++++----
src/lstack/netif/lstack_ethdev.c | 5 ++++-
src/lstack/netif/lstack_vdev.c | 23 ++++++++++++++++++++++-
4 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/src/lstack/Makefile b/src/lstack/Makefile
index 7ce35d4..cb7be16 100644
--- a/src/lstack/Makefile
+++ b/src/lstack/Makefile
@@ -65,6 +65,7 @@ LIBRTE_LIB = $(LIB_PATH)/librte_bus_pci.so \
$(LIB_PATH)/librte_mempool_ring.so \
$(LIB_PATH)/librte_timer.so \
$(LIB_PATH)/librte_eal.so \
+ $(LIB_PATH)/librte_gro.so \
$(LIB_PATH)/librte_ring.so \
$(LIB_PATH)/librte_mbuf.so \
$(LIB_PATH)/librte_telemetry.so \
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
index f924ee7..52b4624 100644
--- a/src/lstack/core/lstack_lwip.c
+++ b/src/lstack/core/lstack_lwip.c
@@ -596,17 +596,19 @@ static inline void del_data_in_event(struct lwip_sock *sock)
static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len)
{
+ uint16_t tot_len = pbuf->tot_len - free_len;
+
while (free_len && pbuf) {
if (free_len >= pbuf->len) {
- struct pbuf *p = pbuf;
+ free_len = free_len - pbuf->len;
pbuf = pbuf->next;
- free_len = free_len - p->len;
} else {
pbuf_remove_header(pbuf, free_len);
break;
}
}
+ pbuf->tot_len = tot_len;
return pbuf;
}
@@ -636,13 +638,13 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
}
}
- copy_len = (recv_left > pbuf->len) ? pbuf->len : (uint16_t)recv_left;
+ copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : (uint16_t)recv_left;
pbuf_copy_partial(pbuf, (char *)buf + recvd, copy_len, 0);
recvd += copy_len;
recv_left -= copy_len;
- if (pbuf->len > copy_len || pbuf->next) {
+ if (pbuf->tot_len > copy_len) {
sock->recv_lastdata = pbuf_free_partial(pbuf, copy_len);
} else {
if (sock->wakeup) {
diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c
index 3abed5e..1c35919 100644
--- a/src/lstack/netif/lstack_ethdev.c
+++ b/src/lstack/netif/lstack_ethdev.c
@@ -41,6 +41,7 @@ void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
struct pbuf_custom *pc = NULL;
struct rte_mbuf *m = mbuf;
uint16_t len, pkt_len;
+ struct rte_mbuf *next_m = NULL;
pkt_len = (uint16_t)rte_pktmbuf_pkt_len(m);
while (m != NULL) {
@@ -66,7 +67,9 @@ void eth_dev_recv(struct rte_mbuf *mbuf, struct protocol_stack *stack)
}
prev = next;
- m = m->next;
+ next_m = m->next;
+ m->next = NULL;
+ m = next_m;
}
if (head != NULL) {
diff --git a/src/lstack/netif/lstack_vdev.c b/src/lstack/netif/lstack_vdev.c
index 1c148e1..8df0c5e 100644
--- a/src/lstack/netif/lstack_vdev.c
+++ b/src/lstack/netif/lstack_vdev.c
@@ -18,6 +18,8 @@
#include <rte_ring.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
+#include <rte_gro.h>
+#include <rte_net.h>
#include "lstack_cfg.h"
#include "lstack_dpdk.h"
@@ -63,7 +65,26 @@ static uint32_t ltran_rx_poll(struct protocol_stack *stack, struct rte_mbuf **pk
static uint32_t vdev_rx_poll(struct protocol_stack *stack, struct rte_mbuf **pkts, uint32_t max_mbuf)
{
- return rte_eth_rx_burst(stack->port_id, stack->queue_id, pkts, max_mbuf);
+ struct rte_gro_param gro_param = {
+ .gro_types = RTE_GRO_TCP_IPV4,
+ /* 8*16=128(max) */
+ .max_flow_num = 8,
+ .max_item_per_flow = 16,
+ };
+
+ uint32_t pkt_num = rte_eth_rx_burst(stack->port_id, stack->queue_id, pkts, max_mbuf);
+ if (pkt_num <= 1) {
+ return pkt_num;
+ }
+
+ for (uint32_t i = 0; i < pkt_num; i++) {
+ struct rte_net_hdr_lens hdr_lens;
+ pkts[i]->packet_type = rte_net_get_ptype(pkts[i], &hdr_lens, RTE_PTYPE_ALL_MASK);
+ pkts[i]->l2_len = hdr_lens.l2_len;
+ pkts[i]->l3_len = hdr_lens.l3_len;
+ pkts[i]->l4_len = hdr_lens.l4_len;
+ }
+ return rte_gro_reassemble_burst(pkts, pkt_num, &gro_param);
}
static uint32_t ltran_tx_xmit(struct protocol_stack *stack, struct rte_mbuf **pkts, uint32_t nr_pkts)
--
2.23.0

View File

@ -0,0 +1,25 @@
From 6ccad03ae3ec736034e4ff403c9d2ef26516ef4a Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Tue, 1 Nov 2022 11:42:33 +0800
Subject: [PATCH 7/7] expand nic rx desc size
---
src/common/gazelle_opt.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h
index 8ab40ed..7521e8c 100644
--- a/src/common/gazelle_opt.h
+++ b/src/common/gazelle_opt.h
@@ -45,7 +45,7 @@
#define FREE_RX_QUEUE_SZ DPDK_PKT_BURST_SIZE
#define RTE_TEST_TX_DESC_DEFAULT 512
-#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_RX_DESC_DEFAULT 4096
#define DPDK_PKT_BURST_SIZE 512
--
2.23.0

View File

@ -2,7 +2,7 @@
Name: gazelle
Version: 1.0.1
Release: 16
Release: 17
Summary: gazelle is a high performance user-mode stack
License: MulanPSL-2.0
URL: https://gitee.com/openeuler/gazelle
@ -126,6 +126,13 @@ Patch9108: 0108-avoid-useless-stack-check-wakeup-event.patch
Patch9109: 0109-fix-mesg-loss.patch
Patch9110: 0110-add-accept4-and-epoll_create1.patch
Patch9111: 0111-refactor-event-notice.patch
Patch9112: 0112-add-writev-and-readv.patch
Patch9113: 0113-optimized-some-function-in-lstack.patch
Patch9114: 0114-fix-gazellectl-stats-err-when-donot-bind-numa.patch
Patch9115: 0115-add-usleep-when-write_ring-is-busy.patch
Patch9116: 0116-optimize-variable-access.patch
Patch9117: 0117-add-gro.patch
Patch9118: 0118-expand-nic-rx-desc-size.patch
%description
%{name} is a high performance user-mode stack.
@ -166,6 +173,9 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b
%config(noreplace) %{conf_path}/ltran.conf
%changelog
* Fri Nov 04 2022 wuchangsheng <wuchangsheng2@huawei.com> - 1.0.1-17
- Optimize ceph client performance
* Sat Oct 08 2022 wuchangsheng <wuchangsheng2@huawei.com> - 1.0.1-16
- refactor event
addapt for ceph client