update README
fix some bugs
refactor pkt read send to improve performance
refactor kernle event toimproveperformance
(cherry picked from commit a8c66704608ca83c799adab88be6214bccdcfa44)
3233 lines
114 KiB
Diff
3233 lines
114 KiB
Diff
From f0e65d55ace8b4e5c1bd2023f1b62da181f421c5 Mon Sep 17 00:00:00 2001
|
|
From: jiangheng14 <jiangheng14@huawei.com>
|
|
Date: Thu, 7 Jul 2022 21:31:20 +0800
|
|
Subject: [PATCH 6/6] refactor-pkt-read-send-performance
|
|
|
|
---
|
|
src/common/dpdk_common.h | 145 +++++++
|
|
src/common/gazelle_dfx_msg.h | 50 ++-
|
|
src/lstack/api/lstack_epoll.c | 321 ++++++--------
|
|
src/lstack/api/lstack_wrap.c | 15 +-
|
|
src/lstack/core/lstack_cfg.c | 18 +
|
|
src/lstack/core/lstack_control_plane.c | 4 +-
|
|
src/lstack/core/lstack_dpdk.c | 32 +-
|
|
src/lstack/core/lstack_init.c | 5 +-
|
|
src/lstack/core/lstack_lwip.c | 477 ++++++++++-----------
|
|
src/lstack/core/lstack_protocol_stack.c | 276 ++++++------
|
|
src/lstack/core/lstack_stack_stat.c | 80 +++-
|
|
src/lstack/core/lstack_thread_rpc.c | 85 ++--
|
|
src/lstack/include/lstack_cfg.h | 1 +
|
|
src/lstack/include/lstack_dpdk.h | 5 +-
|
|
src/lstack/include/lstack_lockless_queue.h | 10 +-
|
|
src/lstack/include/lstack_lwip.h | 11 +-
|
|
src/lstack/include/lstack_protocol_stack.h | 31 +-
|
|
src/lstack/include/lstack_stack_stat.h | 3 +
|
|
src/lstack/include/lstack_thread_rpc.h | 5 +-
|
|
src/lstack/include/posix/lstack_epoll.h | 22 +-
|
|
src/lstack/lstack.conf | 1 +
|
|
src/lstack/netif/lstack_ethdev.c | 9 +-
|
|
src/lstack/netif/lstack_vdev.c | 14 +-
|
|
src/ltran/ltran_dfx.c | 52 ++-
|
|
src/ltran/ltran_forward.c | 18 +-
|
|
src/ltran/ltran_stat.c | 19 +-
|
|
26 files changed, 911 insertions(+), 798 deletions(-)
|
|
|
|
diff --git a/src/common/dpdk_common.h b/src/common/dpdk_common.h
|
|
index 595e85f..4a7bd37 100644
|
|
--- a/src/common/dpdk_common.h
|
|
+++ b/src/common/dpdk_common.h
|
|
@@ -14,6 +14,7 @@
|
|
#define __GAZELLE_DPDK_COMMON_H__
|
|
|
|
#include <rte_mbuf.h>
|
|
+#include <rte_ring.h>
|
|
|
|
#define GAZELLE_KNI_NAME "kni" // will be removed during dpdk update
|
|
|
|
@@ -35,6 +36,7 @@ static __rte_always_inline void copy_mbuf(struct rte_mbuf *dst, struct rte_mbuf
|
|
return;
|
|
|
|
dst->ol_flags = src->ol_flags;
|
|
+ dst->tx_offload = src->tx_offload;
|
|
// there is buf_len in rx_descriptor_fields1, copy it is dangerous acturely. 16 : mbuf desc size
|
|
rte_memcpy((uint8_t *)dst->rx_descriptor_fields1, (const uint8_t *)src->rx_descriptor_fields1, 16);
|
|
|
|
@@ -65,4 +67,147 @@ int32_t dpdk_kni_init(uint16_t port, struct rte_mempool *pool);
|
|
int32_t kni_process_tx(struct rte_mbuf **pkts_burst, uint32_t count);
|
|
void kni_process_rx(uint16_t port);
|
|
|
|
+/*
|
|
+ gazelle custom rte ring interface
|
|
+ lightweight ring reduce atomic and smp_mb.
|
|
+ only surpport single-consumers or the single-consumer.
|
|
+ */
|
|
+static __rte_always_inline uint32_t gazelle_light_ring_enqueue_busrt(struct rte_ring *r, void **obj_table, uint32_t n)
|
|
+{
|
|
+ uint32_t cons = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);
|
|
+ uint32_t prod = r->prod.tail;
|
|
+ uint32_t free_entries = r->capacity + cons - prod;
|
|
+
|
|
+ if (n > free_entries) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ __rte_ring_enqueue_elems(r, prod, obj_table, sizeof(void *), n);
|
|
+
|
|
+ __atomic_store_n(&r->prod.tail, prod + n, __ATOMIC_RELEASE);
|
|
+
|
|
+ return n;
|
|
+}
|
|
+
|
|
+static __rte_always_inline uint32_t gazelle_light_ring_dequeue_burst(struct rte_ring *r, void **obj_table, uint32_t n)
|
|
+{
|
|
+ uint32_t prod = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);
|
|
+ uint32_t cons = r->cons.tail;
|
|
+ uint32_t entries = prod - cons;
|
|
+
|
|
+ if (n > entries) {
|
|
+ n = entries;
|
|
+ }
|
|
+
|
|
+ if (n == 0) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ __rte_ring_dequeue_elems(r, cons, obj_table, sizeof(void *), n);
|
|
+
|
|
+ __atomic_store_n(&r->cons.tail, cons + n, __ATOMIC_RELEASE);
|
|
+
|
|
+ return n;
|
|
+}
|
|
+
|
|
+/*
|
|
+ gazelle custom rte ring interface
|
|
+ one thread enqueue and dequeue, other thread read object use and object still in queue.
|
|
+ so malloc and free in same thread. only surpport single-consumers or the single-consumer.
|
|
+
|
|
+ cons.tail prod.tail prod.head cons.head
|
|
+ gazelle_ring_sp_enqueue: cons.head-->> cons.tal, enqueue object
|
|
+ gazelle_ring_sc_dequeue: cons.tal -->> prod.tail, dequeue object
|
|
+ gazelle_ring_read: prod.tail-->> cons.head, read object, prod.head = prod.tail + N
|
|
+ gazelle_ring_read_over: prod.tail = prod.head, update prod.tail
|
|
+ */
|
|
+static __rte_always_inline uint32_t gazelle_ring_sp_enqueue(struct rte_ring *r, void **obj_table, uint32_t n)
|
|
+{
|
|
+ uint32_t head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE);
|
|
+ uint32_t tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);
|
|
+
|
|
+ uint32_t entries = r->capacity + tail - head;
|
|
+ if (n > entries) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+
|
|
+ __rte_ring_enqueue_elems(r, head, obj_table, sizeof(void *), n);
|
|
+
|
|
+ __atomic_store_n(&r->cons.head, head + n, __ATOMIC_RELEASE);
|
|
+
|
|
+ return n;
|
|
+}
|
|
+
|
|
+static __rte_always_inline uint32_t gazelle_ring_sc_dequeue(struct rte_ring *r, void **obj_table, uint32_t n)
|
|
+{
|
|
+ uint32_t cons = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);
|
|
+ uint32_t prod = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);
|
|
+
|
|
+ uint32_t entries = prod - cons;
|
|
+ if (n > entries) {
|
|
+ n = entries;
|
|
+ }
|
|
+ if (unlikely(n == 0)) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+
|
|
+ __rte_ring_dequeue_elems(r, cons, obj_table, sizeof(void *), n);
|
|
+
|
|
+ __atomic_store_n(&r->cons.tail, cons + n, __ATOMIC_RELEASE);
|
|
+
|
|
+ return n;
|
|
+}
|
|
+
|
|
+static __rte_always_inline uint32_t gazelle_ring_read(struct rte_ring *r, void **obj_table, uint32_t n)
|
|
+{
|
|
+ uint32_t cons = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE);
|
|
+ uint32_t prod = r->prod.head;
|
|
+
|
|
+ const uint32_t entries = cons - prod;
|
|
+ if (n > entries) {
|
|
+ n = entries;
|
|
+ }
|
|
+ if (unlikely(n == 0)) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ __rte_ring_dequeue_elems(r, prod, obj_table, sizeof(void *), n);
|
|
+
|
|
+ r->prod.head = prod + n;
|
|
+
|
|
+ return n;
|
|
+}
|
|
+
|
|
+static __rte_always_inline void gazelle_ring_read_n(struct rte_ring *r, uint32_t n)
|
|
+{
|
|
+ __atomic_store_n(&r->prod.tail, r->prod.tail + n, __ATOMIC_RELEASE);
|
|
+}
|
|
+
|
|
+static __rte_always_inline void gazelle_ring_read_over(struct rte_ring *r)
|
|
+{
|
|
+ __atomic_store_n(&r->prod.tail, r->prod.head, __ATOMIC_RELEASE);
|
|
+}
|
|
+
|
|
+static __rte_always_inline uint32_t gazelle_ring_readover_count(struct rte_ring *r)
|
|
+{
|
|
+ rte_smp_rmb();
|
|
+ return r->prod.tail - r->cons.tail;
|
|
+}
|
|
+static __rte_always_inline uint32_t gazelle_ring_readable_count(const struct rte_ring *r)
|
|
+{
|
|
+ rte_smp_rmb();
|
|
+ return r->cons.head - r->prod.tail;
|
|
+}
|
|
+
|
|
+static __rte_always_inline uint32_t gazelle_ring_count(const struct rte_ring *r)
|
|
+{
|
|
+ rte_smp_rmb();
|
|
+ return r->cons.head - r->cons.tail;
|
|
+}
|
|
+static __rte_always_inline uint32_t gazelle_ring_free_count(const struct rte_ring *r)
|
|
+{
|
|
+ return r->capacity - gazelle_ring_count(r);
|
|
+}
|
|
#endif
|
|
diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h
|
|
index 6db67ee..cf435cd 100644
|
|
--- a/src/common/gazelle_dfx_msg.h
|
|
+++ b/src/common/gazelle_dfx_msg.h
|
|
@@ -57,34 +57,37 @@ enum GAZELLE_LATENCY_TYPE {
|
|
GAZELLE_LATENCY_READ,
|
|
};
|
|
|
|
-struct gazelle_stat_pkts {
|
|
- uint64_t tx;
|
|
- uint64_t rx;
|
|
- uint64_t tx_drop;
|
|
- uint64_t rx_drop;
|
|
- uint64_t rx_allocmbuf_fail;
|
|
- uint64_t tx_allocmbuf_fail;
|
|
- uint64_t call_msg_cnt;
|
|
- uint16_t conn_num;
|
|
- uint16_t send_idle_ring_cnt;
|
|
- uint64_t event_list;
|
|
+struct gazelle_stack_stat {
|
|
+ uint64_t wakeup_events;
|
|
+ uint64_t write_lwip_cnt;
|
|
+ uint64_t send_self_rpc;
|
|
uint64_t read_lwip_drop;
|
|
uint64_t read_lwip_cnt;
|
|
- uint64_t write_lwip_drop;
|
|
- uint64_t write_lwip_cnt;
|
|
+ uint64_t rx_allocmbuf_fail;
|
|
+ uint64_t tx_allocmbuf_fail;
|
|
+ uint64_t call_null;
|
|
+ uint64_t rx_drop;
|
|
+ uint64_t rx;
|
|
+ uint64_t tx_drop;
|
|
+ uint64_t tx;
|
|
+};
|
|
+
|
|
+struct gazelle_wakeup_stat {
|
|
+ uint64_t app_events;
|
|
+ uint64_t app_write_idlefail;
|
|
uint64_t app_write_cnt;
|
|
uint64_t app_read_cnt;
|
|
- uint64_t app_write_idlefail;
|
|
- uint64_t app_write_drop;
|
|
- uint64_t recv_list;
|
|
- uint64_t wakeup_events;
|
|
- uint64_t app_events;
|
|
- uint64_t call_alloc_fail;
|
|
uint64_t read_null;
|
|
- uint64_t call_null;
|
|
- uint64_t arp_copy_fail;
|
|
- uint64_t send_self_rpc;
|
|
- uint64_t send_list;
|
|
+};
|
|
+
|
|
+struct gazelle_stat_pkts {
|
|
+ uint64_t call_msg_cnt;
|
|
+ uint16_t conn_num;
|
|
+ uint64_t recv_list_cnt;
|
|
+ uint64_t call_alloc_fail;
|
|
+ uint64_t send_list_cnt;
|
|
+ struct gazelle_stack_stat stack_stat;
|
|
+ struct gazelle_wakeup_stat wakeup_stat;
|
|
};
|
|
|
|
/* same as define in lwip/stats.h - struct stats_mib2 */
|
|
@@ -159,6 +162,7 @@ struct gazelle_stat_lstack_conn_info {
|
|
uint32_t recv_ring_cnt;
|
|
uint32_t tcp_sub_state;
|
|
int32_t sem_cnt;
|
|
+ int32_t fd;
|
|
};
|
|
|
|
struct gazelle_stat_lstack_conn {
|
|
diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c
|
|
index cba67ea..4978f02 100644
|
|
--- a/src/lstack/api/lstack_epoll.c
|
|
+++ b/src/lstack/api/lstack_epoll.c
|
|
@@ -16,6 +16,7 @@
|
|
#include <time.h>
|
|
#include <poll.h>
|
|
#include <stdatomic.h>
|
|
+#include <pthread.h>
|
|
|
|
#include <lwip/lwipsock.h>
|
|
#include <lwip/sockets.h>
|
|
@@ -39,36 +40,33 @@
|
|
#define SEC_TO_NSEC 1000000000
|
|
#define SEC_TO_MSEC 1000
|
|
#define MSEC_TO_NSEC 1000000
|
|
-#define EPOLL_MAX_EVENTS 512
|
|
#define POLL_KERNEL_EVENTS 32
|
|
|
|
-static PER_THREAD struct wakeup_poll g_wakeup_poll = {0};
|
|
-static bool g_use_epoll = false; /* FIXME: when no epoll close prepare event for performance testing */
|
|
-
|
|
void add_epoll_event(struct netconn *conn, uint32_t event)
|
|
{
|
|
/* conn sock nerver null, because lwip call this func */
|
|
- struct lwip_sock *sock = get_socket(conn->socket);
|
|
-
|
|
- if ((event & sock->epoll_events) == 0) {
|
|
+ struct lwip_sock *sock = get_socket_by_fd(conn->socket);
|
|
+ if (sock->wakeup == NULL || (event & sock->epoll_events) == 0) {
|
|
return;
|
|
}
|
|
+ struct wakeup_poll *wakeup = sock->wakeup;
|
|
+ struct protocol_stack *stack = sock->stack;
|
|
|
|
- sock->events |= event & sock->epoll_events;
|
|
-
|
|
-#ifdef GAZELLE_USE_EPOLL_EVENT_STACK
|
|
- if (g_use_epoll && list_is_empty(&sock->event_list)) {
|
|
- list_add_node(&sock->stack->event_list, &sock->event_list);
|
|
+ if (wakeup->type == WAKEUP_EPOLL) {
|
|
+ pthread_spin_lock(&wakeup->event_list_lock);
|
|
+ sock->events |= (event == EPOLLERR) ? (EPOLLIN | EPOLLERR) : (event & sock->epoll_events);
|
|
+ if (list_is_null(&sock->event_list)) {
|
|
+ list_add_node(&wakeup->event_list, &sock->event_list);
|
|
+ }
|
|
+ pthread_spin_unlock(&wakeup->event_list_lock);
|
|
}
|
|
-#endif
|
|
|
|
- if (sock->wakeup) {
|
|
- sock->stack->stats.wakeup_events++;
|
|
- if (get_protocol_stack_group()->wakeup_enable) {
|
|
- rte_ring_sp_enqueue(sock->stack->wakeup_ring, &sock->wakeup->event_sem);
|
|
- } else {
|
|
- sem_post(&sock->wakeup->event_sem);
|
|
- }
|
|
+ stack->stats.wakeup_events++;
|
|
+ sem_t *sem = &wakeup->event_sem;
|
|
+ if (get_protocol_stack_group()->wakeup_enable) {
|
|
+ gazelle_light_ring_enqueue_busrt(stack->wakeup_ring, (void **)&sem, 1);
|
|
+ } else {
|
|
+ sem_post(sem);
|
|
}
|
|
}
|
|
|
|
@@ -77,61 +75,34 @@ static inline uint32_t update_events(struct lwip_sock *sock)
|
|
uint32_t event = 0;
|
|
|
|
if (sock->epoll_events & EPOLLIN) {
|
|
- if (sock->attach_fd > 0 && NETCONN_IS_ACCEPTIN(sock)) {
|
|
- event |= EPOLLIN;
|
|
- }
|
|
-
|
|
- if (sock->attach_fd < 0 && NETCONN_IS_DATAIN(sock)) {
|
|
+ if (NETCONN_IS_DATAIN(sock) || NETCONN_IS_ACCEPTIN(sock)) {
|
|
event |= EPOLLIN;
|
|
}
|
|
}
|
|
|
|
if ((sock->epoll_events & EPOLLOUT) && NETCONN_IS_OUTIDLE(sock)) {
|
|
- event |= EPOLLOUT;
|
|
+ /* lwip_netconn_do_connected set LIBOS FLAGS when connected */
|
|
+ if (sock->conn && CONN_TYPE_IS_LIBOS(sock->conn)) {
|
|
+ event |= EPOLLOUT;
|
|
+ }
|
|
}
|
|
|
|
- if ((sock->epoll_events & EPOLLERR) && (sock->events & EPOLLERR)) {
|
|
+ if (sock->errevent > 0) {
|
|
event |= EPOLLERR | EPOLLIN;
|
|
}
|
|
|
|
return event;
|
|
}
|
|
|
|
-#ifdef GAZELLE_USE_EPOLL_EVENT_STACK
|
|
-void update_stack_events(struct protocol_stack *stack)
|
|
+static void raise_pending_events(struct wakeup_poll *wakeup, struct lwip_sock *sock)
|
|
{
|
|
- if (!g_use_epoll) {
|
|
- return;
|
|
- }
|
|
-
|
|
- struct list_node *node, *temp;
|
|
- list_for_each_safe(node, temp, &stack->event_list) {
|
|
- struct lwip_sock *sock = container_of(node, struct lwip_sock, event_list);
|
|
-
|
|
- sock->events = update_events(sock);
|
|
- if (sock->events != 0) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- if (pthread_spin_trylock(&stack->event_lock)) {
|
|
- continue;
|
|
+ sock->events = update_events(sock);
|
|
+ if (sock->events) {
|
|
+ pthread_spin_lock(&wakeup->event_list_lock);
|
|
+ if (list_is_null(&sock->event_list)) {
|
|
+ list_add_node(&wakeup->event_list, &sock->event_list);
|
|
}
|
|
- list_del_node_init(&sock->event_list);
|
|
- pthread_spin_unlock(&stack->event_lock);
|
|
- }
|
|
-}
|
|
-#endif
|
|
-
|
|
-static void raise_pending_events(struct lwip_sock *sock)
|
|
-{
|
|
- struct lwip_sock *attach_sock = (sock->attach_fd > 0) ? get_socket_by_fd(sock->attach_fd) : sock;
|
|
- if (attach_sock == NULL) {
|
|
- return;
|
|
- }
|
|
-
|
|
- attach_sock->events = update_events(attach_sock);
|
|
- if (attach_sock->events & attach_sock->epoll_events) {
|
|
- rpc_call_addevent(attach_sock->stack, attach_sock);
|
|
+ pthread_spin_unlock(&wakeup->event_list_lock);
|
|
}
|
|
}
|
|
|
|
@@ -157,11 +128,15 @@ int32_t lstack_epoll_create(int32_t size)
|
|
memset_s(wakeup, sizeof(struct wakeup_poll), 0, sizeof(struct wakeup_poll));
|
|
|
|
init_list_node(&wakeup->event_list);
|
|
- wakeup->epollfd = fd;
|
|
sem_init(&wakeup->event_sem, 0, 0);
|
|
+ pthread_spin_init(&wakeup->event_list_lock, PTHREAD_PROCESS_PRIVATE);
|
|
+
|
|
+ wakeup->type = WAKEUP_EPOLL;
|
|
+ wakeup->epollfd = fd;
|
|
sock->wakeup = wakeup;
|
|
|
|
- g_use_epoll = true;
|
|
+ register_wakeup(wakeup);
|
|
+
|
|
return fd;
|
|
}
|
|
|
|
@@ -176,6 +151,9 @@ int32_t lstack_epoll_close(int32_t fd)
|
|
}
|
|
|
|
if (sock->wakeup) {
|
|
+ unregister_wakeup(sock->wakeup);
|
|
+ sem_destroy(&sock->wakeup->event_sem);
|
|
+ pthread_spin_destroy(&sock->wakeup->event_list_lock);
|
|
free(sock->wakeup);
|
|
}
|
|
sock->wakeup = NULL;
|
|
@@ -236,161 +214,116 @@ int32_t lstack_epoll_ctl(int32_t epfd, int32_t op, int32_t fd, struct epoll_even
|
|
|
|
struct lwip_sock *sock = get_socket(fd);
|
|
if (sock == NULL) {
|
|
- epoll_sock->wakeup->have_kernel_fd = true;
|
|
return posix_api->epoll_ctl_fn(epfd, op, fd, event);
|
|
}
|
|
|
|
if (CONN_TYPE_HAS_HOST(sock->conn)) {
|
|
- epoll_sock->wakeup->have_kernel_fd = true;
|
|
int32_t ret = posix_api->epoll_ctl_fn(epfd, op, fd, event);
|
|
if (ret < 0) {
|
|
- return ret;
|
|
+ LSTACK_LOG(ERR, LSTACK, "fd=%d epfd=%d op=%d\n", fd, epfd, op);
|
|
}
|
|
}
|
|
|
|
+ struct wakeup_poll *wakeup = epoll_sock->wakeup;
|
|
do {
|
|
switch (op) {
|
|
case EPOLL_CTL_ADD:
|
|
- sock->wakeup = epoll_sock->wakeup;
|
|
- if (sock->stack) {
|
|
- epoll_sock->wakeup->stack_fd_cnt[sock->stack->queue_id]++;
|
|
- }
|
|
- if (list_is_empty(&sock->event_list)) {
|
|
- list_add_node(&sock->wakeup->event_list, &sock->event_list);
|
|
- }
|
|
+ sock->wakeup = wakeup;
|
|
+ wakeup->stack_fd_cnt[sock->stack->queue_id]++;
|
|
/* fall through */
|
|
case EPOLL_CTL_MOD:
|
|
sock->epoll_events = event->events | EPOLLERR | EPOLLHUP;
|
|
sock->ep_data = event->data;
|
|
- if (sock->conn && NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP) {
|
|
- raise_pending_events(sock);
|
|
- }
|
|
+ raise_pending_events(wakeup, sock);
|
|
break;
|
|
case EPOLL_CTL_DEL:
|
|
- list_del_node_init(&sock->event_list);
|
|
sock->epoll_events = 0;
|
|
- if (sock->stack) {
|
|
- epoll_sock->wakeup->stack_fd_cnt[sock->stack->queue_id]--;
|
|
- }
|
|
+ wakeup->stack_fd_cnt[sock->stack->queue_id]--;
|
|
+ pthread_spin_lock(&wakeup->event_list_lock);
|
|
+ list_del_node_null(&sock->event_list);
|
|
+ pthread_spin_unlock(&wakeup->event_list_lock);
|
|
break;
|
|
default:
|
|
GAZELLE_RETURN(EINVAL);
|
|
}
|
|
- fd = sock->nextfd;
|
|
- sock = get_socket(fd);
|
|
- } while (fd > 0 && sock != NULL);
|
|
+ sock = sock->listen_next;
|
|
+ } while (sock != NULL);
|
|
|
|
- update_epoll_max_stack(epoll_sock->wakeup);
|
|
+ update_epoll_max_stack(wakeup);
|
|
return 0;
|
|
}
|
|
|
|
-#ifdef GAZELLE_USE_EPOLL_EVENT_STACK
|
|
-static int32_t epoll_lwip_event(struct wakeup_poll *wakeup, struct epoll_event *events, uint32_t maxevents)
|
|
+static void del_node_array(struct epoll_event *events, int32_t event_num, int32_t del_index)
|
|
{
|
|
- int32_t event_num = 0;
|
|
- struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
-
|
|
- maxevents = LWIP_MIN(EPOLL_MAX_EVENTS, maxevents);
|
|
- for (uint32_t i = 0; i < stack_group->stack_num && event_num < maxevents; i++) {
|
|
- struct protocol_stack *stack = stack_group->stacks[i];
|
|
- int32_t start_event_num = event_num;
|
|
-
|
|
- if (pthread_spin_trylock(&stack->event_lock)) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- struct list_node *node, *temp;
|
|
- list_for_each_safe(node, temp, &stack->event_list) {
|
|
- struct lwip_sock *sock = container_of(node, struct lwip_sock, event_list);
|
|
-
|
|
- uint32_t event = sock->events & sock->epoll_events;
|
|
- if (event == 0 || sock->wait_close) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- events[event_num].events = event;
|
|
- events[event_num].data = sock->ep_data;
|
|
- event_num++;
|
|
+ for (int32_t i = del_index; i + 1 < event_num; i++) {
|
|
+ events[i] = events[i + 1];
|
|
+ }
|
|
+}
|
|
|
|
- if (event_num >= maxevents) {
|
|
- break;
|
|
+static int32_t del_duplicate_event(struct epoll_event *events, int32_t event_num)
|
|
+{
|
|
+ for (int32_t i = 0; i < event_num; i++) {
|
|
+ for (int32_t j = i + 1; j < event_num; j++) {
|
|
+ if (events[i].data.u64 == events[j].data.u64) {
|
|
+ del_node_array(events, event_num, j);
|
|
+ event_num--;
|
|
}
|
|
}
|
|
-
|
|
- pthread_spin_unlock(&stack->event_lock);
|
|
-
|
|
- __sync_fetch_and_add(&stack->stats.app_events, event_num - start_event_num);
|
|
}
|
|
|
|
return event_num;
|
|
}
|
|
-#else
|
|
+
|
|
static int32_t epoll_lwip_event(struct wakeup_poll *wakeup, struct epoll_event *events, uint32_t maxevents)
|
|
{
|
|
int32_t event_num = 0;
|
|
struct list_node *node, *temp;
|
|
+ int32_t accept_num = 0;
|
|
+
|
|
+ pthread_spin_lock(&wakeup->event_list_lock);
|
|
+
|
|
list_for_each_safe(node, temp, &wakeup->event_list) {
|
|
struct lwip_sock *sock = container_of(node, struct lwip_sock, event_list);
|
|
- if (sock->conn == NULL) {
|
|
- list_del_node_init(&sock->event_list);
|
|
- continue;
|
|
+ if (sock->conn && sock->conn->acceptmbox) {
|
|
+ accept_num++;
|
|
}
|
|
|
|
- struct lwip_sock *temp_sock = sock;
|
|
- do {
|
|
- struct lwip_sock *attach_sock = (temp_sock->attach_fd > 0) ? get_socket(temp_sock->attach_fd) : temp_sock;
|
|
- if (attach_sock == NULL || temp_sock->wait_close) {
|
|
- temp_sock = (temp_sock->nextfd > 0) ? get_socket(temp_sock->nextfd) : NULL;
|
|
- continue;
|
|
- }
|
|
+ events[event_num].events = sock->events;
|
|
+ events[event_num].data = sock->ep_data;
|
|
+ event_num++;
|
|
|
|
- uint32_t event = update_events(attach_sock);
|
|
- if (event != 0) {
|
|
- events[event_num].events = event;
|
|
- events[event_num].data = temp_sock->ep_data;
|
|
- event_num++;
|
|
- if (event_num >= maxevents) {
|
|
- break;
|
|
- }
|
|
- }
|
|
+ if (event_num >= maxevents) {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
|
|
- temp_sock = (temp_sock->nextfd > 0) ? get_socket(temp_sock->nextfd) : NULL;
|
|
- } while (temp_sock);
|
|
+ pthread_spin_unlock(&wakeup->event_list_lock);
|
|
+
|
|
+ if (accept_num > 1) {
|
|
+ event_num = del_duplicate_event(events, event_num);
|
|
}
|
|
|
|
+ // atomic_fetch_add(&wakeup->bind_stack->stats.app_events, event_num);
|
|
return event_num;
|
|
}
|
|
-#endif
|
|
|
|
static int32_t poll_lwip_event(struct pollfd *fds, nfds_t nfds)
|
|
{
|
|
int32_t event_num = 0;
|
|
|
|
for (uint32_t i = 0; i < nfds; i++) {
|
|
- /* listenfd nextfd pointerto next stack listen, others nextfd=-1 */
|
|
+ /* sock->listen_next pointerto next stack listen */
|
|
int32_t fd = fds[i].fd;
|
|
- while (fd > 0) {
|
|
- struct lwip_sock *sock = get_socket(fd);
|
|
- if (sock == NULL) {
|
|
- break;
|
|
- }
|
|
-
|
|
- /* attach listen is empty, all event in attached listen. attached listen attach_fd is self */
|
|
- struct lwip_sock *attach_sock = (sock->attach_fd > 0) ? get_socket(sock->attach_fd) : sock;
|
|
- if (attach_sock == NULL || sock->wait_close) {
|
|
- fd = sock->nextfd;
|
|
- continue;
|
|
- }
|
|
-
|
|
- uint32_t events = update_events(attach_sock);
|
|
+ struct lwip_sock *sock = get_socket_by_fd(fd);
|
|
+ while (sock && sock->conn) {
|
|
+ uint32_t events = update_events(sock);
|
|
if (events) {
|
|
fds[i].revents = events;
|
|
- __sync_fetch_and_add(&sock->stack->stats.app_events, 1);
|
|
event_num++;
|
|
break;
|
|
}
|
|
|
|
- fd = sock->nextfd;
|
|
+ sock = sock->listen_next;;
|
|
}
|
|
}
|
|
|
|
@@ -417,7 +350,7 @@ static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct prot
|
|
|
|
/* avoid kernel thread post too much, use EPOLLET */
|
|
struct epoll_event event;
|
|
- event.data.ptr = &wakeup->event_sem;
|
|
+ event.data.ptr = wakeup;
|
|
event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET;
|
|
if (posix_api->epoll_ctl_fn(new_stack->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
|
|
@@ -457,15 +390,18 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe
|
|
|
|
do {
|
|
event_num += epoll_lwip_event(sock->wakeup, &events[event_num], maxevents - event_num);
|
|
+ sock->wakeup->stat.app_events += event_num;
|
|
|
|
- if (sock->wakeup->have_kernel_fd) {
|
|
+ if (__atomic_load_n(&sock->wakeup->have_kernel_event, __ATOMIC_RELAXED)) {
|
|
event_num += posix_api->epoll_wait_fn(epfd, &events[event_num], maxevents - event_num, 0);
|
|
}
|
|
|
|
if (event_num > 0) {
|
|
+ while (sem_trywait(&sock->wakeup->event_sem) == 0);
|
|
break;
|
|
}
|
|
|
|
+ sock->wakeup->have_kernel_event = false;
|
|
if (timeout < 0) {
|
|
ret = sem_wait(&sock->wakeup->event_sem);
|
|
} else {
|
|
@@ -479,6 +415,7 @@ int32_t lstack_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxe
|
|
static void init_poll_wakeup_data(struct wakeup_poll *wakeup)
|
|
{
|
|
sem_init(&wakeup->event_sem, 0, 0);
|
|
+ wakeup->type = WAKEUP_POLL;
|
|
|
|
wakeup->last_fds = calloc(POLL_KERNEL_EVENTS, sizeof(struct pollfd));
|
|
if (wakeup->last_fds == NULL) {
|
|
@@ -542,11 +479,6 @@ static void update_kernel_poll(struct wakeup_poll *wakeup, uint32_t index, struc
|
|
if (posix_api->epoll_ctl_fn(wakeup->epollfd, EPOLL_CTL_ADD, new_fd->fd, &event) != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
|
|
}
|
|
-
|
|
- wakeup->last_fds[index].fd = new_fd->fd;
|
|
- wakeup->last_fds[index].events = new_fd->events;
|
|
-
|
|
- wakeup->have_kernel_fd = true;
|
|
}
|
|
|
|
static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfds)
|
|
@@ -554,17 +486,17 @@ static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfd
|
|
if (!wakeup->init) {
|
|
wakeup->init = true;
|
|
init_poll_wakeup_data(wakeup);
|
|
- } else {
|
|
- while (sem_trywait(&wakeup->event_sem) == 0) {}
|
|
- }
|
|
-
|
|
- if (nfds > wakeup->last_max_nfds) {
|
|
- resize_kernel_poll(wakeup, nfds);
|
|
+ register_wakeup(wakeup);
|
|
}
|
|
|
|
int32_t stack_count[PROTOCOL_STACK_MAX] = {0};
|
|
int32_t poll_change = 0;
|
|
|
|
+ /* poll fds num more, recalloc fds size */
|
|
+ if (nfds > wakeup->last_max_nfds) {
|
|
+ resize_kernel_poll(wakeup, nfds);
|
|
+ poll_change = 1;
|
|
+ }
|
|
/* poll fds num less, del old fd */
|
|
for (uint32_t i = nfds; i < wakeup->last_nfds; i++) {
|
|
update_kernel_poll(wakeup, i, NULL);
|
|
@@ -572,44 +504,51 @@ static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfd
|
|
}
|
|
|
|
for (uint32_t i = 0; i < nfds; i++) {
|
|
+ int32_t fd = fds[i].fd;
|
|
fds[i].revents = 0;
|
|
+ struct lwip_sock *sock = get_socket_by_fd(fd);
|
|
|
|
- if (fds[i].fd == wakeup->last_fds[i].fd && fds[i].events == wakeup->last_fds[i].events) {
|
|
- continue;
|
|
+ if (fd == wakeup->last_fds[i].fd && fds[i].events == wakeup->last_fds[i].events) {
|
|
+ /* fd close then socket may get same fd. */
|
|
+ if (sock == NULL || sock->wakeup != NULL) {
|
|
+ continue;
|
|
+ }
|
|
}
|
|
+ wakeup->last_fds[i].fd = fd;
|
|
+ wakeup->last_fds[i].events = fds[i].events;
|
|
poll_change = 1;
|
|
|
|
- int32_t fd = fds[i].fd;
|
|
- struct lwip_sock *sock = get_socket(fd);
|
|
- if (sock == NULL || CONN_TYPE_HAS_HOST(sock->conn)) {
|
|
+ if (sock == NULL || sock->conn == NULL || CONN_TYPE_HAS_HOST(sock->conn)) {
|
|
update_kernel_poll(wakeup, i, fds + i);
|
|
}
|
|
|
|
- do {
|
|
- sock = get_socket(fd);
|
|
- if (sock == NULL || sock->conn == NULL) {
|
|
- break;
|
|
+ while (sock && sock->conn) {
|
|
+ if (sock->epoll_events != (fds[i].events | POLLERR)) {
|
|
+ sock->epoll_events = fds[i].events | POLLERR;
|
|
+ }
|
|
+ if (sock->wakeup != wakeup) {
|
|
+ sock->wakeup = wakeup;
|
|
}
|
|
- sock->epoll_events = fds[i].events | POLLERR;
|
|
- sock->wakeup = wakeup;
|
|
|
|
- /* listenfd list */
|
|
- fd = sock->nextfd;
|
|
stack_count[sock->stack->queue_id]++;
|
|
- } while (fd > 0);
|
|
+ /* listenfd list */
|
|
+ sock = sock->listen_next;
|
|
+ }
|
|
}
|
|
|
|
- wakeup->last_nfds = nfds;
|
|
if (poll_change == 0) {
|
|
return;
|
|
}
|
|
+ wakeup->last_nfds = nfds;
|
|
|
|
poll_bind_statck(wakeup, stack_count);
|
|
}
|
|
|
|
int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
|
|
{
|
|
- poll_init(&g_wakeup_poll, fds, nfds);
|
|
+ static PER_THREAD struct wakeup_poll wakeup_poll = {0};
|
|
+
|
|
+ poll_init(&wakeup_poll, fds, nfds);
|
|
|
|
int32_t event_num = 0;
|
|
int32_t ret;
|
|
@@ -624,23 +563,25 @@ int32_t lstack_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
|
|
event_num += poll_lwip_event(fds, nfds);
|
|
|
|
/* reduce syscall epoll_wait */
|
|
- if (g_wakeup_poll.have_kernel_fd) {
|
|
- int32_t kernel_num = posix_api->epoll_wait_fn(g_wakeup_poll.epollfd, g_wakeup_poll.events, nfds, 0);
|
|
+ if (__atomic_load_n(&wakeup_poll.have_kernel_event, __ATOMIC_RELAXED)) {
|
|
+ int32_t kernel_num = posix_api->epoll_wait_fn(wakeup_poll.epollfd, wakeup_poll.events, nfds, 0);
|
|
for (int32_t i = 0; i < kernel_num; i++) {
|
|
- uint32_t index = g_wakeup_poll.events[i].data.u32;
|
|
- fds[index].revents = g_wakeup_poll.events[i].events;
|
|
+ uint32_t index = wakeup_poll.events[i].data.u32;
|
|
+ fds[index].revents = wakeup_poll.events[i].events;
|
|
}
|
|
event_num += kernel_num >= 0 ? kernel_num : 0;
|
|
}
|
|
|
|
if (event_num > 0) {
|
|
+ while (sem_trywait(&wakeup_poll.event_sem) == 0);
|
|
break;
|
|
}
|
|
|
|
+ wakeup_poll.have_kernel_event = false;
|
|
if (timeout < 0) {
|
|
- ret = sem_wait(&g_wakeup_poll.event_sem);
|
|
+ ret = sem_wait(&wakeup_poll.event_sem);
|
|
} else {
|
|
- ret = sem_timedwait(&g_wakeup_poll.event_sem, &poll_time);
|
|
+ ret = sem_timedwait(&wakeup_poll.event_sem, &poll_time);
|
|
}
|
|
} while (ret == 0);
|
|
|
|
diff --git a/src/lstack/api/lstack_wrap.c b/src/lstack/api/lstack_wrap.c
|
|
index bf5dcb4..ec68d62 100644
|
|
--- a/src/lstack/api/lstack_wrap.c
|
|
+++ b/src/lstack/api/lstack_wrap.c
|
|
@@ -52,7 +52,7 @@ static inline enum KERNEL_LWIP_PATH select_path(int fd)
|
|
return PATH_KERNEL;
|
|
}
|
|
|
|
- if (unlikely(posix_api->is_chld)) {
|
|
+ if (unlikely(posix_api->ues_posix)) {
|
|
return PATH_KERNEL;
|
|
}
|
|
|
|
@@ -84,7 +84,7 @@ static inline int32_t do_epoll_create(int32_t size)
|
|
return posix_api->epoll_create_fn(size);
|
|
}
|
|
|
|
- if (unlikely(posix_api->is_chld)) {
|
|
+ if (unlikely(posix_api->ues_posix)) {
|
|
return posix_api->epoll_create_fn(size);
|
|
}
|
|
|
|
@@ -93,7 +93,7 @@ static inline int32_t do_epoll_create(int32_t size)
|
|
|
|
static inline int32_t do_epoll_ctl(int32_t epfd, int32_t op, int32_t fd, struct epoll_event* event)
|
|
{
|
|
- if (unlikely(posix_api->is_chld)) {
|
|
+ if (unlikely(posix_api->ues_posix)) {
|
|
return posix_api->epoll_ctl_fn(epfd, op, fd, event);
|
|
}
|
|
|
|
@@ -102,7 +102,7 @@ static inline int32_t do_epoll_ctl(int32_t epfd, int32_t op, int32_t fd, struct
|
|
|
|
static inline int32_t do_epoll_wait(int32_t epfd, struct epoll_event* events, int32_t maxevents, int32_t timeout)
|
|
{
|
|
- if (unlikely(posix_api->is_chld)) {
|
|
+ if (unlikely(posix_api->ues_posix)) {
|
|
return posix_api->epoll_wait_fn(epfd, events, maxevents, timeout);
|
|
}
|
|
|
|
@@ -203,7 +203,8 @@ static inline int32_t do_listen(int32_t s, int32_t backlog)
|
|
return posix_api->listen_fn(s, backlog);
|
|
}
|
|
|
|
- int32_t ret = stack_broadcast_listen(s, backlog);
|
|
+ int32_t ret = get_global_cfg_params()->listen_shadow ? stack_broadcast_listen(s, backlog) :
|
|
+ stack_single_listen(s, backlog);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
@@ -264,7 +265,7 @@ static inline int32_t do_setsockopt(int32_t s, int32_t level, int32_t optname, c
|
|
static inline int32_t do_socket(int32_t domain, int32_t type, int32_t protocol)
|
|
{
|
|
if ((domain != AF_INET && domain != AF_UNSPEC)
|
|
- || posix_api->is_chld) {
|
|
+ || posix_api->ues_posix) {
|
|
return posix_api->socket_fn(domain, type, protocol);
|
|
}
|
|
|
|
@@ -368,7 +369,7 @@ static int32_t do_poll(struct pollfd *fds, nfds_t nfds, int32_t timeout)
|
|
GAZELLE_RETURN(EINVAL);
|
|
}
|
|
|
|
- if (unlikely(posix_api->is_chld) || nfds == 0) {
|
|
+ if (unlikely(posix_api->ues_posix) || nfds == 0) {
|
|
return posix_api->poll_fn(fds, nfds, timeout);
|
|
}
|
|
|
|
diff --git a/src/lstack/core/lstack_cfg.c b/src/lstack/core/lstack_cfg.c
|
|
index 13086a3..ca2b979 100644
|
|
--- a/src/lstack/core/lstack_cfg.c
|
|
+++ b/src/lstack/core/lstack_cfg.c
|
|
@@ -56,6 +56,7 @@ static int32_t parse_devices(void);
|
|
static int32_t parse_dpdk_args(void);
|
|
static int32_t parse_gateway_addr(void);
|
|
static int32_t parse_kni_switch(void);
|
|
+static int32_t parse_listen_shadow(void);
|
|
|
|
struct config_vector_t {
|
|
const char *name;
|
|
@@ -73,6 +74,7 @@ static struct config_vector_t g_config_tbl[] = {
|
|
{ "num_wakeup", parse_wakeup_cpu_number },
|
|
{ "low_power_mode", parse_low_power_mode },
|
|
{ "kni_switch", parse_kni_switch },
|
|
+ { "listen_shadow", parse_listen_shadow },
|
|
{ NULL, NULL }
|
|
};
|
|
|
|
@@ -670,6 +672,22 @@ static int32_t parse_use_ltran(void)
|
|
return 0;
|
|
}
|
|
|
|
+static int32_t parse_listen_shadow(void)
|
|
+{
|
|
+ const config_setting_t *arg = NULL;
|
|
+
|
|
+ arg = config_lookup(&g_config, "listen_shadow");
|
|
+ if (arg == NULL) {
|
|
+ g_config_params.listen_shadow = false;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ int32_t val = config_setting_get_int(arg);
|
|
+ g_config_params.listen_shadow = (val == 0) ? false : true;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int32_t parse_kni_switch(void)
|
|
{
|
|
const config_setting_t *arg = NULL;
|
|
diff --git a/src/lstack/core/lstack_control_plane.c b/src/lstack/core/lstack_control_plane.c
|
|
index 26a1b1c..ef38fb5 100644
|
|
--- a/src/lstack/core/lstack_control_plane.c
|
|
+++ b/src/lstack/core/lstack_control_plane.c
|
|
@@ -713,7 +713,7 @@ void control_server_thread(void *arg)
|
|
struct epoll_event evt_array;
|
|
while (1) {
|
|
/* wait init finish */
|
|
- if (posix_api->is_chld) {
|
|
+ if (posix_api->ues_posix) {
|
|
usleep(GAZELLE_10MS);
|
|
continue;
|
|
}
|
|
@@ -759,7 +759,7 @@ void control_client_thread(void *arg)
|
|
|
|
while (1) {
|
|
/* wait init finish */
|
|
- if (posix_api->is_chld) {
|
|
+ if (posix_api->ues_posix) {
|
|
usleep(GAZELLE_10MS);
|
|
continue;
|
|
}
|
|
diff --git a/src/lstack/core/lstack_dpdk.c b/src/lstack/core/lstack_dpdk.c
|
|
index df0332b..6675d7b 100644
|
|
--- a/src/lstack/core/lstack_dpdk.c
|
|
+++ b/src/lstack/core/lstack_dpdk.c
|
|
@@ -129,26 +129,6 @@ static struct rte_mempool *create_pktmbuf_mempool(const char *name, uint32_t nb_
|
|
return pool;
|
|
}
|
|
|
|
-struct rte_mempool *create_rpc_mempool(const char *name, uint16_t queue_id)
|
|
-{
|
|
- char pool_name[PATH_MAX];
|
|
- struct rte_mempool *pool;
|
|
- int32_t ret;
|
|
-
|
|
- ret = snprintf_s(pool_name, sizeof(pool_name), PATH_MAX - 1, "%s_%hu", name, queue_id);
|
|
- if (ret < 0) {
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- pool = rte_mempool_create(pool_name, CALL_POOL_SZ, sizeof(struct rpc_msg), 0, 0, NULL, NULL, NULL,
|
|
- NULL, rte_socket_id(), 0);
|
|
- if (pool == NULL) {
|
|
- LSTACK_LOG(ERR, LSTACK, "cannot create %s pool rte_err=%d\n", pool_name, rte_errno);
|
|
- }
|
|
-
|
|
- return pool;
|
|
-}
|
|
-
|
|
static struct reg_ring_msg *create_reg_mempool(const char *name, uint16_t queue_id)
|
|
{
|
|
int ret;
|
|
@@ -175,13 +155,13 @@ int32_t pktmbuf_pool_init(struct protocol_stack *stack, uint16_t stack_num)
|
|
return -1;
|
|
}
|
|
|
|
- stack->rx_pktmbuf_pool = create_pktmbuf_mempool("rx_mbuf", RX_NB_MBUF, RX_MBUF_CACHE_SZ,
|
|
+ stack->rx_pktmbuf_pool = create_pktmbuf_mempool("rx_mbuf", RX_NB_MBUF / stack_num, RX_MBUF_CACHE_SZ,
|
|
stack->queue_id);
|
|
if (stack->rx_pktmbuf_pool == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
- stack->tx_pktmbuf_pool = create_pktmbuf_mempool("tx_mbuf", TX_NB_MBUF, TX_MBUF_CACHE_SZ,
|
|
+ stack->tx_pktmbuf_pool = create_pktmbuf_mempool("tx_mbuf", TX_NB_MBUF / stack_num, TX_MBUF_CACHE_SZ,
|
|
stack->queue_id);
|
|
if (stack->tx_pktmbuf_pool == NULL) {
|
|
return -1;
|
|
@@ -220,12 +200,14 @@ int32_t create_shared_ring(struct protocol_stack *stack)
|
|
lockless_queue_init(&stack->rpc_queue);
|
|
|
|
if (get_protocol_stack_group()->wakeup_enable) {
|
|
- stack->wakeup_ring = create_ring("WAKEUP_RING", VDEV_WAKEUP_QUEUE_SZ, 0, stack->queue_id);
|
|
+ stack->wakeup_ring = create_ring("WAKEUP_RING", VDEV_WAKEUP_QUEUE_SZ, RING_F_SP_ENQ | RING_F_SC_DEQ,
|
|
+ stack->queue_id);
|
|
if (stack->wakeup_ring == NULL) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
+
|
|
if (use_ltran()) {
|
|
stack->rx_ring = create_ring("RING_RX", VDEV_RX_QUEUE_SZ, RING_F_SP_ENQ | RING_F_SC_DEQ, stack->queue_id);
|
|
if (stack->rx_ring == NULL) {
|
|
@@ -255,7 +237,7 @@ int32_t fill_mbuf_to_ring(struct rte_mempool *mempool, struct rte_ring *ring, ui
|
|
struct rte_mbuf *free_buf[FREE_RX_QUEUE_SZ];
|
|
|
|
while (remain > 0) {
|
|
- batch = LWIP_MIN(remain, FREE_RX_QUEUE_SZ);
|
|
+ batch = LWIP_MIN(remain, RING_SIZE(FREE_RX_QUEUE_SZ));
|
|
|
|
ret = gazelle_alloc_pktmbuf(mempool, free_buf, batch);
|
|
if (ret != 0) {
|
|
@@ -263,7 +245,7 @@ int32_t fill_mbuf_to_ring(struct rte_mempool *mempool, struct rte_ring *ring, ui
|
|
return -1;
|
|
}
|
|
|
|
- ret = rte_ring_en_enqueue_bulk(ring, (void **)free_buf, batch);
|
|
+ ret = gazelle_ring_sp_enqueue(ring, (void **)free_buf, batch);
|
|
if (ret == 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "cannot enqueue to ring, count: %d\n", (int32_t)batch);
|
|
return -1;
|
|
diff --git a/src/lstack/core/lstack_init.c b/src/lstack/core/lstack_init.c
|
|
index f8e96bf..78040b0 100644
|
|
--- a/src/lstack/core/lstack_init.c
|
|
+++ b/src/lstack/core/lstack_init.c
|
|
@@ -143,7 +143,7 @@ static int32_t check_preload_bind_proc(void)
|
|
|
|
__attribute__((destructor)) void gazelle_network_exit(void)
|
|
{
|
|
- if (posix_api != NULL && !posix_api->is_chld) {
|
|
+ if (posix_api != NULL && !posix_api->ues_posix) {
|
|
lwip_exit();
|
|
}
|
|
|
|
@@ -275,7 +275,6 @@ __attribute__((constructor)) void gazelle_network_init(void)
|
|
LSTACK_EXIT(1, "stack thread or kernel_event thread failed\n");
|
|
}
|
|
|
|
- posix_api->is_chld = 0;
|
|
+ posix_api->ues_posix = 0;
|
|
LSTACK_LOG(INFO, LSTACK, "gazelle_network_init success\n");
|
|
- rte_smp_mb();
|
|
}
|
|
diff --git a/src/lstack/core/lstack_lwip.c b/src/lstack/core/lstack_lwip.c
|
|
index 8544ef7..156fc1f 100644
|
|
--- a/src/lstack/core/lstack_lwip.c
|
|
+++ b/src/lstack/core/lstack_lwip.c
|
|
@@ -35,33 +35,24 @@
|
|
#define HALF_DIVISOR (2)
|
|
#define USED_IDLE_WATERMARK (VDEV_IDLE_QUEUE_SZ >> 2)
|
|
|
|
-void listen_list_add_node(int32_t head_fd, int32_t add_fd)
|
|
-{
|
|
- struct lwip_sock *sock = NULL;
|
|
- int32_t fd = head_fd;
|
|
-
|
|
- while (fd > 0) {
|
|
- sock = get_socket(fd);
|
|
- if (sock == NULL) {
|
|
- LSTACK_LOG(ERR, LSTACK, "tid %ld, %d get sock null\n", get_stack_tid(), fd);
|
|
- return;
|
|
- }
|
|
- fd = sock->nextfd;
|
|
- }
|
|
- sock->nextfd = add_fd;
|
|
-}
|
|
+static int32_t lwip_alloc_pbufs(pbuf_layer layer, uint16_t length, pbuf_type type, void **pbufs, uint32_t num);
|
|
|
|
static void free_ring_pbuf(struct rte_ring *ring)
|
|
{
|
|
- while (1) {
|
|
- struct pbuf *pbuf = NULL;
|
|
- int32_t ret = rte_ring_sc_dequeue(ring, (void **)&pbuf);
|
|
- if (ret != 0) {
|
|
- break;
|
|
- }
|
|
+ void *pbufs[SOCK_RECV_RING_SIZE];
|
|
|
|
- pbuf_free(pbuf);
|
|
- }
|
|
+ do {
|
|
+ gazelle_ring_read(ring, pbufs, RING_SIZE(SOCK_RECV_RING_SIZE));
|
|
+ gazelle_ring_read_over(ring);
|
|
+ } while (gazelle_ring_readable_count(ring));
|
|
+
|
|
+ do {
|
|
+ uint32_t num = gazelle_ring_sc_dequeue(ring, pbufs, RING_SIZE(SOCK_RECV_RING_SIZE));
|
|
+
|
|
+ for (uint32_t i = 0; i < num; i++) {
|
|
+ pbuf_free(pbufs[i]);
|
|
+ }
|
|
+ } while (gazelle_ring_readover_count(ring));
|
|
}
|
|
|
|
static void reset_sock_data(struct lwip_sock *sock)
|
|
@@ -73,11 +64,6 @@ static void reset_sock_data(struct lwip_sock *sock)
|
|
}
|
|
sock->recv_ring = NULL;
|
|
|
|
- if (sock->recv_wait_free) {
|
|
- free_ring_pbuf(sock->recv_wait_free);
|
|
- rte_ring_free(sock->recv_wait_free);
|
|
- }
|
|
- sock->recv_wait_free = NULL;
|
|
|
|
if (sock->send_ring) {
|
|
free_ring_pbuf(sock->send_ring);
|
|
@@ -85,19 +71,11 @@ static void reset_sock_data(struct lwip_sock *sock)
|
|
}
|
|
sock->send_ring = NULL;
|
|
|
|
- if (sock->send_idle_ring) {
|
|
- free_ring_pbuf(sock->send_idle_ring);
|
|
- rte_ring_free(sock->send_idle_ring);
|
|
- }
|
|
- sock->send_idle_ring = NULL;
|
|
|
|
sock->stack = NULL;
|
|
sock->wakeup = NULL;
|
|
- sock->events = 0;
|
|
- sock->nextfd = -1;
|
|
- sock->attach_fd = -1;
|
|
+ sock->listen_next = NULL;
|
|
sock->wait_close = false;
|
|
- sock->shadowed_sock = NULL;
|
|
sock->epoll_events = 0;
|
|
sock->events = 0;
|
|
|
|
@@ -105,34 +83,29 @@ static void reset_sock_data(struct lwip_sock *sock)
|
|
pbuf_free(sock->recv_lastdata);
|
|
}
|
|
sock->recv_lastdata = NULL;
|
|
-
|
|
- if (sock->send_lastdata) {
|
|
- pbuf_free(sock->send_lastdata);
|
|
- }
|
|
- sock->send_lastdata = NULL;
|
|
}
|
|
|
|
static void replenish_send_idlembuf(struct rte_ring *ring)
|
|
{
|
|
- uint32_t replenish_cnt = rte_ring_free_count(ring);
|
|
+ void *pbuf[SOCK_SEND_RING_SIZE];
|
|
|
|
- for (uint32_t i = 0; i < replenish_cnt; i++) {
|
|
- struct pbuf *pbuf = lwip_alloc_pbuf(PBUF_TRANSPORT, TCP_MSS, PBUF_RAM);
|
|
- if (pbuf == NULL) {
|
|
- break;
|
|
- }
|
|
+ uint32_t replenish_cnt = gazelle_ring_free_count(ring);
|
|
+ uint32_t alloc_num = LWIP_MIN(replenish_cnt, RING_SIZE(SOCK_SEND_RING_SIZE));
|
|
|
|
- int32_t ret = rte_ring_sp_enqueue(ring, (void *)pbuf);
|
|
- if (ret < 0) {
|
|
- pbuf_free(pbuf);
|
|
- break;
|
|
- }
|
|
+ if (lwip_alloc_pbufs(PBUF_TRANSPORT, TCP_MSS, PBUF_RAM, (void **)pbuf, alloc_num) != 0) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ uint32_t num = gazelle_ring_sp_enqueue(ring, pbuf, alloc_num);
|
|
+ for (uint32_t i = num; i < alloc_num; i++) {
|
|
+ pbuf_free(pbuf[i]);
|
|
}
|
|
}
|
|
|
|
void gazelle_init_sock(int32_t fd)
|
|
{
|
|
static uint32_t name_tick = 0;
|
|
+ struct protocol_stack *stack = get_protocol_stack();
|
|
struct lwip_sock *sock = get_socket(fd);
|
|
if (sock == NULL) {
|
|
return;
|
|
@@ -140,38 +113,26 @@ void gazelle_init_sock(int32_t fd)
|
|
|
|
reset_sock_data(sock);
|
|
|
|
- sock->recv_ring = create_ring("sock_recv", SOCK_RECV_RING_SIZE, 0, atomic_fetch_add(&name_tick, 1));
|
|
+ sock->recv_ring = create_ring("sock_recv", SOCK_RECV_RING_SIZE, RING_F_SP_ENQ | RING_F_SC_DEQ,
|
|
+ atomic_fetch_add(&name_tick, 1));
|
|
if (sock->recv_ring == NULL) {
|
|
LSTACK_LOG(ERR, LSTACK, "sock_recv create failed. errno: %d.\n", rte_errno);
|
|
return;
|
|
}
|
|
|
|
- sock->recv_wait_free = create_ring("wait_free", SOCK_RECV_RING_SIZE, 0, atomic_fetch_add(&name_tick, 1));
|
|
- if (sock->recv_wait_free == NULL) {
|
|
- LSTACK_LOG(ERR, LSTACK, "wait_free create failed. errno: %d.\n", rte_errno);
|
|
- return;
|
|
- }
|
|
-
|
|
- sock->send_ring = create_ring("sock_send", SOCK_SEND_RING_SIZE, 0, atomic_fetch_add(&name_tick, 1));
|
|
+ sock->send_ring = create_ring("sock_send", SOCK_SEND_RING_SIZE, RING_F_SP_ENQ | RING_F_SC_DEQ,
|
|
+ atomic_fetch_add(&name_tick, 1));
|
|
if (sock->send_ring == NULL) {
|
|
LSTACK_LOG(ERR, LSTACK, "sock_send create failed. errno: %d.\n", rte_errno);
|
|
return;
|
|
}
|
|
+ replenish_send_idlembuf(sock->send_ring);
|
|
|
|
- sock->send_idle_ring = create_ring("idle_send", SOCK_SEND_RING_SIZE, 0, atomic_fetch_add(&name_tick, 1));
|
|
- if (sock->send_idle_ring == NULL) {
|
|
- LSTACK_LOG(ERR, LSTACK, "idle_send create failed. errno: %d.\n", rte_errno);
|
|
- return;
|
|
- }
|
|
- replenish_send_idlembuf(sock->send_idle_ring);
|
|
-
|
|
- sock->stack = get_protocol_stack();
|
|
+ sock->stack = stack;
|
|
sock->stack->conn_num++;
|
|
- init_list_node(&sock->recv_list);
|
|
- init_list_node(&sock->attach_list);
|
|
- init_list_node(&sock->listen_list);
|
|
- init_list_node(&sock->event_list);
|
|
- init_list_node(&sock->send_list);
|
|
+ init_list_node_null(&sock->recv_list);
|
|
+ init_list_node_null(&sock->event_list);
|
|
+ init_list_node_null(&sock->send_list);
|
|
}
|
|
|
|
void gazelle_clean_sock(int32_t fd)
|
|
@@ -181,17 +142,18 @@ void gazelle_clean_sock(int32_t fd)
|
|
return;
|
|
}
|
|
|
|
+ if (sock->wakeup && sock->wakeup->type == WAKEUP_EPOLL) {
|
|
+ pthread_spin_lock(&sock->wakeup->event_list_lock);
|
|
+ list_del_node_null(&sock->event_list);
|
|
+ pthread_spin_unlock(&sock->wakeup->event_list_lock);
|
|
+ }
|
|
+
|
|
sock->stack->conn_num--;
|
|
|
|
reset_sock_data(sock);
|
|
|
|
- list_del_node_init(&sock->recv_list);
|
|
- list_del_node_init(&sock->attach_list);
|
|
- list_del_node_init(&sock->listen_list);
|
|
-#ifdef GAZELLE_USE_EPOLL_EVENT_STACK
|
|
- list_del_node_init(&sock->event_list);
|
|
-#endif
|
|
- list_del_node_init(&sock->send_list);
|
|
+ list_del_node_null(&sock->recv_list);
|
|
+ list_del_node_null(&sock->send_list);
|
|
}
|
|
|
|
void gazelle_free_pbuf(struct pbuf *pbuf)
|
|
@@ -201,45 +163,14 @@ void gazelle_free_pbuf(struct pbuf *pbuf)
|
|
}
|
|
|
|
struct rte_mbuf *mbuf = pbuf_to_mbuf(pbuf);
|
|
- if (mbuf->pool != NULL) {
|
|
- rte_pktmbuf_free(mbuf);
|
|
- } else {
|
|
- rte_free(mbuf);
|
|
- }
|
|
-}
|
|
-
|
|
-static int32_t alloc_mbufs(struct rte_mempool *pool, struct rte_mbuf **mbufs, uint32_t num)
|
|
-{
|
|
- // alloc mbuf from pool
|
|
- if (rte_pktmbuf_alloc_bulk(pool, mbufs, num) == 0) {
|
|
- return 0;
|
|
- }
|
|
-
|
|
- // alloc mbuf from system
|
|
- for (uint32_t i = 0; i < num; i++) {
|
|
- struct rte_mbuf *mbuf = (struct rte_mbuf *)rte_malloc(NULL, pool->elt_size, sizeof(uint64_t));
|
|
- if (mbuf == NULL) {
|
|
- for (uint32_t j = 0; j < i; j++) {
|
|
- rte_free(mbufs[j]);
|
|
- mbufs[j] = NULL;
|
|
- }
|
|
- return -1;
|
|
- }
|
|
-
|
|
- mbufs[i] = mbuf;
|
|
- rte_pktmbuf_init(pool, NULL, mbuf, 0);
|
|
- rte_pktmbuf_reset(mbuf);
|
|
- mbuf->pool = NULL;
|
|
- }
|
|
-
|
|
- return 0;
|
|
+ rte_pktmbuf_free(mbuf);
|
|
}
|
|
|
|
int32_t gazelle_alloc_pktmbuf(struct rte_mempool *pool, struct rte_mbuf **mbufs, uint32_t num)
|
|
{
|
|
struct pbuf_custom *pbuf_custom = NULL;
|
|
|
|
- int32_t ret = alloc_mbufs(pool, mbufs, num);
|
|
+ int32_t ret = rte_pktmbuf_alloc_bulk(pool, mbufs, num);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
@@ -252,86 +183,98 @@ int32_t gazelle_alloc_pktmbuf(struct rte_mempool *pool, struct rte_mbuf **mbufs,
|
|
return 0;
|
|
}
|
|
|
|
-struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type)
|
|
+static struct pbuf *init_mbuf_to_pbuf(struct rte_mbuf *mbuf, pbuf_layer layer, uint16_t length, pbuf_type type)
|
|
{
|
|
- struct rte_mbuf *mbuf;
|
|
- int32_t ret = alloc_mbufs(get_protocol_stack()->tx_pktmbuf_pool, &mbuf, 1);
|
|
- if (ret != 0) {
|
|
- get_protocol_stack()->stats.tx_allocmbuf_fail++;
|
|
- return NULL;
|
|
- }
|
|
-
|
|
struct pbuf_custom *pbuf_custom = mbuf_to_pbuf(mbuf);
|
|
pbuf_custom->custom_free_function = gazelle_free_pbuf;
|
|
|
|
void *data = rte_pktmbuf_mtod(mbuf, void *);
|
|
struct pbuf *pbuf = pbuf_alloced_custom(layer, length, type, pbuf_custom, data, MAX_PACKET_SZ);
|
|
-
|
|
-#if CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW
|
|
if (pbuf) {
|
|
pbuf->ol_flags = 0;
|
|
pbuf->l2_len = 0;
|
|
pbuf->l3_len = 0;
|
|
}
|
|
-#endif
|
|
|
|
return pbuf;
|
|
}
|
|
|
|
-struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags)
|
|
+static int32_t lwip_alloc_pbufs(pbuf_layer layer, uint16_t length, pbuf_type type, void **bufs, uint32_t num)
|
|
{
|
|
- struct pbuf *pbuf = NULL;
|
|
+ int32_t ret = rte_pktmbuf_alloc_bulk(get_protocol_stack()->tx_pktmbuf_pool, (struct rte_mbuf **)bufs, num);
|
|
+ if (ret != 0) {
|
|
+ get_protocol_stack()->stats.tx_allocmbuf_fail++;
|
|
+ return -1;
|
|
+ }
|
|
|
|
- if (sock->send_lastdata) {
|
|
- pbuf = sock->send_lastdata;
|
|
- sock->send_lastdata = NULL;
|
|
- } else {
|
|
- int32_t ret = rte_ring_sc_dequeue(sock->send_ring, (void **)&pbuf);
|
|
- if (ret != 0) {
|
|
- *apiflags &= ~TCP_WRITE_FLAG_MORE;
|
|
- return NULL;
|
|
- }
|
|
+ for (uint32_t i = 0; i < num; i++) {
|
|
+ bufs[i] = init_mbuf_to_pbuf(bufs[i], layer, length, type);
|
|
}
|
|
|
|
- if (pbuf->tot_len >= remain_size) {
|
|
- sock->send_lastdata = pbuf;
|
|
- *apiflags |= TCP_WRITE_FLAG_MORE; /* set TCP_PSH flag */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type)
|
|
+{
|
|
+ struct pbuf *pbuf;
|
|
+
|
|
+ if (lwip_alloc_pbufs(layer, length, type, (void **)&pbuf, 1) != 0) {
|
|
return NULL;
|
|
}
|
|
|
|
- replenish_send_idlembuf(sock->send_idle_ring);
|
|
+ return pbuf;
|
|
+}
|
|
|
|
- if ((sock->epoll_events & EPOLLOUT) && rte_ring_free_count(sock->send_ring)) {
|
|
- add_epoll_event(sock->conn, EPOLLOUT);
|
|
+struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags)
|
|
+{
|
|
+ struct pbuf *pbuf = NULL;
|
|
+
|
|
+ if (gazelle_ring_sc_dequeue(sock->send_ring, (void **)&pbuf, 1) != 1) {
|
|
+ *apiflags &= ~TCP_WRITE_FLAG_MORE;
|
|
+ return NULL;
|
|
}
|
|
|
|
sock->stack->stats.write_lwip_cnt++;
|
|
return pbuf;
|
|
}
|
|
|
|
+static inline void del_data_out_event(struct lwip_sock *sock)
|
|
+{
|
|
+ pthread_spin_lock(&sock->wakeup->event_list_lock);
|
|
+
|
|
+ /* check again avoid cover event add in stack thread */
|
|
+ if (!NETCONN_IS_OUTIDLE(sock)) {
|
|
+ sock->events &= ~EPOLLOUT;
|
|
+
|
|
+ if (sock->events == 0) {
|
|
+ list_del_node_null(&sock->event_list);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pthread_spin_unlock(&sock->wakeup->event_list_lock);
|
|
+}
|
|
+
|
|
ssize_t write_stack_data(struct lwip_sock *sock, const void *buf, size_t len)
|
|
{
|
|
- if (sock->events & EPOLLERR) {
|
|
+ if (sock->errevent > 0) {
|
|
return 0;
|
|
}
|
|
|
|
- uint32_t free_count = rte_ring_free_count(sock->send_ring);
|
|
+ uint32_t free_count = gazelle_ring_readable_count(sock->send_ring);
|
|
if (free_count == 0) {
|
|
return -1;
|
|
}
|
|
|
|
- uint32_t avaible_cont = rte_ring_count(sock->send_idle_ring);
|
|
- avaible_cont = LWIP_MIN(free_count, avaible_cont);
|
|
-
|
|
struct pbuf *pbuf = NULL;
|
|
ssize_t send_len = 0;
|
|
size_t copy_len;
|
|
uint32_t send_pkt = 0;
|
|
|
|
- while (send_len < len && send_pkt < avaible_cont) {
|
|
- int32_t ret = rte_ring_sc_dequeue(sock->send_idle_ring, (void **)&pbuf);
|
|
- if (ret < 0) {
|
|
- sock->stack->stats.app_write_idlefail++;
|
|
+ while (send_len < len && send_pkt < free_count) {
|
|
+ if (gazelle_ring_read(sock->send_ring, (void **)&pbuf, 1) != 1) {
|
|
+ if (sock->wakeup) {
|
|
+ sock->wakeup->stat.app_write_idlefail++;
|
|
+ }
|
|
break;
|
|
}
|
|
|
|
@@ -339,21 +282,42 @@ ssize_t write_stack_data(struct lwip_sock *sock, const void *buf, size_t len)
|
|
pbuf_take(pbuf, (char *)buf + send_len, copy_len);
|
|
pbuf->tot_len = pbuf->len = copy_len;
|
|
|
|
- ret = rte_ring_sp_enqueue(sock->send_ring, pbuf);
|
|
- if (ret != 0) {
|
|
- sock->stack->stats.app_write_drop++;
|
|
- pbuf_free(pbuf);
|
|
- break;
|
|
- }
|
|
-
|
|
send_len += copy_len;
|
|
send_pkt++;
|
|
}
|
|
- __sync_fetch_and_add(&sock->stack->stats.app_write_cnt, send_pkt);
|
|
+ gazelle_ring_read_over(sock->send_ring);
|
|
+
|
|
+ if (sock->wakeup) {
|
|
+ sock->wakeup->stat.app_write_cnt += send_pkt;
|
|
+ if (sock->wakeup->type == WAKEUP_EPOLL && (sock->events & EPOLLOUT)) {
|
|
+ del_data_out_event(sock);
|
|
+ }
|
|
+ }
|
|
|
|
return (send_len <= 0) ? -1 : send_len;
|
|
}
|
|
|
|
+static void do_lwip_send(int32_t fd, struct lwip_sock *sock, int32_t flags)
|
|
+{
|
|
+ /* send all send_ring, so len set lwip send max. */
|
|
+ ssize_t len = lwip_send(fd, sock, UINT16_MAX, flags);
|
|
+ if (len == 0) {
|
|
+ /* FIXME: should use POLLRDHUP, when connection be closed. lwip event-callback no POLLRDHUP */
|
|
+ sock->errevent = 1;
|
|
+ add_epoll_event(sock->conn, EPOLLERR);
|
|
+ }
|
|
+
|
|
+ if (gazelle_ring_readable_count(sock->send_ring) < SOCK_SEND_REPLENISH_THRES) {
|
|
+ replenish_send_idlembuf(sock->send_ring);
|
|
+ }
|
|
+
|
|
+ if (len > 0) {
|
|
+ if ((sock->epoll_events & EPOLLOUT) && NETCONN_IS_OUTIDLE(sock)) {
|
|
+ add_epoll_event(sock->conn, EPOLLOUT);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
void stack_send(struct rpc_msg *msg)
|
|
{
|
|
int32_t fd = msg->args[MSG_ARG_0].i;
|
|
@@ -369,17 +333,11 @@ void stack_send(struct rpc_msg *msg)
|
|
return;
|
|
}
|
|
|
|
- /* send all send_ring, so len set lwip send max. */
|
|
- ssize_t len = lwip_send(fd, sock, UINT16_MAX, flags);
|
|
- if (len == 0) {
|
|
- /* FIXME: should use POLLRDHUP, when connection be closed. lwip event-callback no POLLRDHUP */
|
|
- add_epoll_event(sock->conn, EPOLLERR);
|
|
- }
|
|
+ do_lwip_send(fd, sock, flags);
|
|
|
|
/* have remain data add sendlist */
|
|
if (NETCONN_IS_DATAOUT(sock)) {
|
|
- if (list_is_empty(&sock->send_list)) {
|
|
- sock->send_flags = flags;
|
|
+ if (list_is_null(&sock->send_list)) {
|
|
list_add_node(&sock->stack->send_list, &sock->send_list);
|
|
}
|
|
sock->stack->stats.send_self_rpc++;
|
|
@@ -396,20 +354,14 @@ void send_stack_list(struct protocol_stack *stack, uint32_t send_max)
|
|
sock = container_of(node, struct lwip_sock, send_list);
|
|
|
|
if (sock->conn == NULL || !NETCONN_IS_DATAOUT(sock)) {
|
|
- list_del_node_init(&sock->send_list);
|
|
+ list_del_node_null(&sock->send_list);
|
|
continue;
|
|
}
|
|
|
|
- /* send all send_ring, so len set lwip send max. */
|
|
- ssize_t len = lwip_send(sock->conn->socket, sock, UINT16_MAX, sock->send_flags);
|
|
- if (len == 0) {
|
|
- /* FIXME: should use POLLRDHUP, when connection be closed. lwip event-callback no POLLRDHUP */
|
|
- add_epoll_event(sock->conn, EPOLLERR);
|
|
- list_del_node_init(&sock->send_list);
|
|
- }
|
|
+ do_lwip_send(sock->conn->socket, sock, 0);
|
|
|
|
if (!NETCONN_IS_DATAOUT(sock)) {
|
|
- list_del_node_init(&sock->send_list);
|
|
+ list_del_node_null(&sock->send_list);
|
|
}
|
|
|
|
if (++read_num >= send_max) {
|
|
@@ -418,26 +370,39 @@ void send_stack_list(struct protocol_stack *stack, uint32_t send_max)
|
|
}
|
|
}
|
|
|
|
+static inline void free_recv_ring_readover(struct rte_ring *ring)
|
|
+{
|
|
+ void *pbufs[SOCK_RECV_RING_SIZE];
|
|
+ uint32_t num = gazelle_ring_sc_dequeue(ring, pbufs, RING_SIZE(SOCK_RECV_RING_SIZE));
|
|
+ for (uint32_t i = 0; i < num; i++) {
|
|
+ pbuf_free(pbufs[i]);
|
|
+ }
|
|
+}
|
|
+
|
|
ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags)
|
|
{
|
|
if (sock->conn->recvmbox == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
- if (rte_ring_count(sock->recv_wait_free)) {
|
|
- free_ring_pbuf(sock->recv_wait_free);
|
|
+ if (gazelle_ring_readover_count(sock->recv_ring) >= SOCK_RECV_FREE_THRES) {
|
|
+ free_recv_ring_readover(sock->recv_ring);
|
|
+ }
|
|
+
|
|
+ uint32_t free_count = gazelle_ring_free_count(sock->recv_ring);
|
|
+ if (free_count == 0) {
|
|
+ GAZELLE_RETURN(EAGAIN);
|
|
}
|
|
|
|
- uint32_t free_count = rte_ring_free_count(sock->recv_ring);
|
|
uint32_t data_count = rte_ring_count(sock->conn->recvmbox->ring);
|
|
- uint32_t read_max = LWIP_MIN(free_count, data_count);
|
|
- struct pbuf *pbuf = NULL;
|
|
+ uint32_t read_num = LWIP_MIN(free_count, data_count);
|
|
+ read_num = LWIP_MIN(read_num, SOCK_RECV_RING_SIZE);
|
|
+ struct pbuf *pbufs[SOCK_RECV_RING_SIZE];
|
|
uint32_t read_count = 0;
|
|
ssize_t recv_len = 0;
|
|
- int32_t ret;
|
|
|
|
- for (uint32_t i = 0; i < read_max; i++) {
|
|
- err_t err = netconn_recv_tcp_pbuf_flags(sock->conn, &pbuf, apiflags);
|
|
+ for (uint32_t i = 0; i < read_num; i++) {
|
|
+ err_t err = netconn_recv_tcp_pbuf_flags(sock->conn, &pbufs[i], apiflags);
|
|
if (err != ERR_OK) {
|
|
if (recv_len > 0) {
|
|
/* already received data, return that (this trusts in getting the same error from
|
|
@@ -448,35 +413,28 @@ ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags)
|
|
return (err == ERR_CLSD) ? 0 : -1;
|
|
}
|
|
|
|
- if (!(flags & MSG_PEEK)) {
|
|
- ret = rte_ring_sp_enqueue(sock->recv_ring, pbuf);
|
|
- if (ret != 0) {
|
|
- pbuf_free(pbuf);
|
|
- sock->stack->stats.read_lwip_drop++;
|
|
- break;
|
|
- }
|
|
- read_count++;
|
|
- }
|
|
-
|
|
- if (get_protocol_stack_group()->latency_start) {
|
|
- calculate_lstack_latency(&sock->stack->latency, pbuf, GAZELLE_LATENCY_LWIP);
|
|
- }
|
|
-
|
|
- recv_len += pbuf->len;
|
|
+ recv_len += pbufs[i]->tot_len;
|
|
+ read_count++;
|
|
|
|
/* once we have some data to return, only add more if we don't need to wait */
|
|
apiflags |= NETCONN_DONTBLOCK | NETCONN_NOFIN;
|
|
}
|
|
|
|
- if (data_count > read_count) {
|
|
- add_recv_list(sock->conn->socket);
|
|
+ if (!(flags & MSG_PEEK)) {
|
|
+ uint32_t enqueue_num = gazelle_ring_sp_enqueue(sock->recv_ring, (void **)pbufs, read_count);
|
|
+ for (uint32_t i = enqueue_num; i < read_count; i++) {
|
|
+ /* update receive window */
|
|
+ tcp_recved(sock->conn->pcb.tcp, pbufs[i]->tot_len);
|
|
+ pbuf_free(pbufs[i]);
|
|
+ sock->stack->stats.read_lwip_drop++;
|
|
+ }
|
|
}
|
|
|
|
- if (recv_len > 0 && (flags & MSG_PEEK) == 0) {
|
|
- add_epoll_event(sock->conn, EPOLLIN);
|
|
+ for (uint32_t i = 0; get_protocol_stack_group()->latency_start && i < read_count; i++) {
|
|
+ calculate_lstack_latency(&sock->stack->latency, pbufs[i], GAZELLE_LATENCY_LWIP);
|
|
}
|
|
- sock->stack->stats.read_lwip_cnt += read_count;
|
|
|
|
+ sock->stack->stats.read_lwip_cnt += read_count;
|
|
if (recv_len == 0) {
|
|
GAZELLE_RETURN(EAGAIN);
|
|
}
|
|
@@ -532,14 +490,12 @@ ssize_t gazelle_send(int32_t fd, const void *buf, size_t len, int32_t flags)
|
|
GAZELLE_RETURN(EINVAL);
|
|
}
|
|
|
|
- sock->send_flags = flags;
|
|
ssize_t send = write_stack_data(sock, buf, len);
|
|
if (send < 0) {
|
|
GAZELLE_RETURN(EAGAIN);
|
|
} else if (send == 0) {
|
|
return 0;
|
|
}
|
|
- rte_smp_mb();
|
|
|
|
rpc_call_send(fd, NULL, send, flags);
|
|
return send;
|
|
@@ -574,22 +530,52 @@ ssize_t sendmsg_to_stack(int32_t s, const struct msghdr *message, int32_t flags)
|
|
return buflen;
|
|
}
|
|
|
|
+static inline void del_data_in_event(struct lwip_sock *sock)
|
|
+{
|
|
+ pthread_spin_lock(&sock->wakeup->event_list_lock);
|
|
+
|
|
+ /* check again avoid cover event add in stack thread */
|
|
+ if (!NETCONN_IS_DATAIN(sock)) {
|
|
+ sock->events &= ~EPOLLIN;
|
|
+
|
|
+ if (sock->events == 0) {
|
|
+ list_del_node_null(&sock->event_list);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pthread_spin_unlock(&sock->wakeup->event_list_lock);
|
|
+}
|
|
+
|
|
+static struct pbuf *pbuf_free_partial(struct pbuf *pbuf, uint16_t free_len)
|
|
+{
|
|
+ while (free_len && pbuf) {
|
|
+ if (free_len >= pbuf->len) {
|
|
+ struct pbuf *p = pbuf;
|
|
+ pbuf = pbuf->next;
|
|
+ free_len = free_len - p->len;
|
|
+ } else {
|
|
+ pbuf_remove_header(pbuf, free_len);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pbuf;
|
|
+}
|
|
+
|
|
ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
|
|
{
|
|
size_t recv_left = len;
|
|
struct pbuf *pbuf = NULL;
|
|
ssize_t recvd = 0;
|
|
- int32_t ret;
|
|
- u16_t copy_len;
|
|
+ uint16_t copy_len;
|
|
|
|
struct lwip_sock *sock = get_socket(fd);
|
|
if (sock == NULL) {
|
|
LSTACK_LOG(ERR, LSTACK, "get_socket null fd %d.\n", fd);
|
|
GAZELLE_RETURN(EINVAL);
|
|
}
|
|
- sock->recv_flags = flags;
|
|
|
|
- if ((sock->events & EPOLLERR) && !NETCONN_IS_DATAIN(sock)) {
|
|
+ if (sock->errevent > 0) {
|
|
return 0;
|
|
}
|
|
|
|
@@ -598,35 +584,39 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
|
|
pbuf = sock->recv_lastdata;
|
|
sock->recv_lastdata = NULL;
|
|
} else {
|
|
- ret = rte_ring_sc_dequeue(sock->recv_ring, (void **)&pbuf);
|
|
- if (ret != 0) {
|
|
+ if (gazelle_ring_read(sock->recv_ring, (void **)&pbuf, 1) != 1) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
- copy_len = (recv_left > pbuf->tot_len) ? pbuf->tot_len : (u16_t)recv_left;
|
|
+ copy_len = (recv_left > pbuf->len) ? pbuf->len : (uint16_t)recv_left;
|
|
pbuf_copy_partial(pbuf, (char *)buf + recvd, copy_len, 0);
|
|
|
|
recvd += copy_len;
|
|
recv_left -= copy_len;
|
|
|
|
- if (pbuf->tot_len > copy_len) {
|
|
- sock->recv_lastdata = pbuf_free_header(pbuf, copy_len);
|
|
+ if (pbuf->len > copy_len || pbuf->next) {
|
|
+ sock->recv_lastdata = pbuf_free_partial(pbuf, copy_len);
|
|
} else {
|
|
+ if (sock->wakeup) {
|
|
+ sock->wakeup->stat.app_read_cnt += 1;
|
|
+ }
|
|
if (get_protocol_stack_group()->latency_start) {
|
|
calculate_lstack_latency(&sock->stack->latency, pbuf, GAZELLE_LATENCY_READ);
|
|
}
|
|
- ret = rte_ring_sp_enqueue(sock->recv_wait_free, pbuf);
|
|
- if (ret != 0) {
|
|
- pbuf_free(pbuf);
|
|
- }
|
|
- sock->recv_lastdata = NULL;
|
|
- __sync_fetch_and_add(&sock->stack->stats.app_read_cnt, 1);
|
|
+ gazelle_ring_read_over(sock->recv_ring);
|
|
}
|
|
}
|
|
|
|
+ /* rte_ring_count reduce lock */
|
|
+ if (sock->wakeup && sock->wakeup->type == WAKEUP_EPOLL && (sock->events & EPOLLIN)) {
|
|
+ del_data_in_event(sock);
|
|
+ }
|
|
+
|
|
if (recvd == 0) {
|
|
- sock->stack->stats.read_null++;
|
|
+ if (sock->wakeup) {
|
|
+ sock->wakeup->stat.read_null++;
|
|
+ }
|
|
GAZELLE_RETURN(EAGAIN);
|
|
}
|
|
return recvd;
|
|
@@ -634,9 +624,9 @@ ssize_t read_stack_data(int32_t fd, void *buf, size_t len, int32_t flags)
|
|
|
|
void add_recv_list(int32_t fd)
|
|
{
|
|
- struct lwip_sock *sock = get_socket(fd);
|
|
+ struct lwip_sock *sock = get_socket_by_fd(fd);
|
|
|
|
- if (sock->stack && list_is_empty(&sock->recv_list)) {
|
|
+ if (sock && sock->stack && list_is_null(&sock->recv_list)) {
|
|
list_add_node(&sock->stack->recv_list, &sock->recv_list);
|
|
}
|
|
}
|
|
@@ -648,24 +638,26 @@ void read_recv_list(struct protocol_stack *stack, uint32_t max_num)
|
|
struct lwip_sock *sock;
|
|
uint32_t read_num = 0;
|
|
|
|
+ struct list_node *last_node = list->prev;
|
|
list_for_each_safe(node, temp, list) {
|
|
sock = container_of(node, struct lwip_sock, recv_list);
|
|
|
|
- if (sock->conn == NULL || sock->recv_ring == NULL || sock->send_ring == NULL || sock->conn->pcb.tcp == NULL) {
|
|
- list_del_node_init(&sock->recv_list);
|
|
+ if (sock->conn == NULL || sock->conn->recvmbox == NULL || rte_ring_count(sock->conn->recvmbox->ring) == 0) {
|
|
+ list_del_node_null(&sock->recv_list);
|
|
continue;
|
|
}
|
|
|
|
- if (rte_ring_free_count(sock->recv_ring)) {
|
|
- list_del_node_init(&sock->recv_list);
|
|
- ssize_t len = lwip_recv(sock->conn->socket, NULL, 0, sock->recv_flags);
|
|
- if (len == 0) {
|
|
- /* FIXME: should use POLLRDHUP, when connection be closed. lwip event-callback no POLLRDHUP */
|
|
- add_epoll_event(sock->conn, EPOLLERR);
|
|
- }
|
|
+ ssize_t len = lwip_recv(sock->conn->socket, NULL, 0, 0);
|
|
+ if (len == 0) {
|
|
+ /* FIXME: should use POLLRDHUP, when connection be closed. lwip event-callback no POLLRDHUP */
|
|
+ sock->errevent = 1;
|
|
+ add_epoll_event(sock->conn, EPOLLERR);
|
|
+ } else if (len > 0) {
|
|
+ add_epoll_event(sock->conn, EPOLLIN);
|
|
}
|
|
|
|
- if (++read_num >= max_num) {
|
|
+ /* last_node:recv only once per sock. max_num avoid cost too much time this loop */
|
|
+ if (++read_num >= max_num || last_node == node) {
|
|
break;
|
|
}
|
|
}
|
|
@@ -684,14 +676,14 @@ static void copy_pcb_to_conn(struct gazelle_stat_lstack_conn_info *conn, const s
|
|
|
|
if (netconn != NULL && netconn->recvmbox != NULL) {
|
|
conn->recv_cnt = rte_ring_count(netconn->recvmbox->ring);
|
|
+ conn->fd = netconn->socket;
|
|
|
|
struct lwip_sock *sock = get_socket(netconn->socket);
|
|
if (netconn->socket > 0 && sock != NULL && sock->recv_ring != NULL && sock->send_ring != NULL) {
|
|
- conn->recv_ring_cnt = rte_ring_count(sock->recv_ring);
|
|
+ conn->recv_ring_cnt = gazelle_ring_readable_count(sock->recv_ring);
|
|
conn->recv_ring_cnt += (sock->recv_lastdata) ? 1 : 0;
|
|
|
|
- conn->send_ring_cnt = rte_ring_count(sock->send_ring);
|
|
- conn->send_ring_cnt += (sock->send_lastdata) ? 1 : 0;
|
|
+ conn->send_ring_cnt = gazelle_ring_readover_count(sock->send_ring);
|
|
|
|
if (sock->wakeup) {
|
|
sem_getvalue(&sock->wakeup->event_sem, &conn->sem_cnt);
|
|
@@ -756,9 +748,11 @@ void create_shadow_fd(struct rpc_msg *msg)
|
|
}
|
|
|
|
clone_lwip_socket_opt(clone_sock, sock);
|
|
- clone_sock->shadowed_sock = sock;
|
|
|
|
- listen_list_add_node(fd, clone_fd);
|
|
+ while (sock->listen_next) {
|
|
+ sock = sock->listen_next;
|
|
+ }
|
|
+ sock->listen_next = clone_sock;
|
|
|
|
int32_t ret = lwip_bind(clone_fd, addr, addr_len);
|
|
if (ret < 0) {
|
|
@@ -843,11 +837,6 @@ static uint32_t get_list_count(struct list_node *list)
|
|
return count;
|
|
}
|
|
|
|
-void stack_eventlist_count(struct rpc_msg *msg)
|
|
-{
|
|
- msg->result = get_list_count(&get_protocol_stack()->event_list);
|
|
-}
|
|
-
|
|
void stack_sendlist_count(struct rpc_msg *msg)
|
|
{
|
|
msg->result = get_list_count(&get_protocol_stack()->send_list);
|
|
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
|
|
index 88513ba..a1f3790 100644
|
|
--- a/src/lstack/core/lstack_protocol_stack.c
|
|
+++ b/src/lstack/core/lstack_protocol_stack.c
|
|
@@ -11,6 +11,7 @@
|
|
*/
|
|
#define _GNU_SOURCE
|
|
#include <pthread.h>
|
|
+#include <stdatomic.h>
|
|
|
|
#include <lwip/sockets.h>
|
|
#include <lwip/tcpip.h>
|
|
@@ -30,6 +31,7 @@
|
|
#include "lstack_protocol_stack.h"
|
|
#include "lstack_cfg.h"
|
|
#include "lstack_control_plane.h"
|
|
+#include "posix/lstack_epoll.h"
|
|
#include "lstack_stack_stat.h"
|
|
|
|
#define READ_LIST_MAX 32
|
|
@@ -39,7 +41,6 @@
|
|
|
|
static PER_THREAD uint16_t g_stack_idx = PROTOCOL_STACK_MAX;
|
|
static struct protocol_stack_group g_stack_group = {0};
|
|
-static PER_THREAD long g_stack_tid = 0;
|
|
|
|
void set_init_fail(void);
|
|
typedef void *(*stack_thread_func)(void *arg);
|
|
@@ -66,6 +67,8 @@ static inline void set_stack_idx(uint16_t idx)
|
|
|
|
long get_stack_tid(void)
|
|
{
|
|
+ static PER_THREAD long g_stack_tid = 0;
|
|
+
|
|
if (g_stack_tid == 0) {
|
|
g_stack_tid = syscall(__NR_gettid);
|
|
}
|
|
@@ -96,17 +99,37 @@ struct protocol_stack *get_protocol_stack_by_fd(int32_t fd)
|
|
return sock->stack;
|
|
}
|
|
|
|
-struct protocol_stack *get_minconn_protocol_stack(void)
|
|
+struct protocol_stack *get_bind_protocol_stack(void)
|
|
{
|
|
- int32_t min_index = 0;
|
|
+ static PER_THREAD struct protocol_stack *bind_stack = NULL;
|
|
+
|
|
+ /* same app communication thread bind same stack */
|
|
+ if (bind_stack) {
|
|
+ return bind_stack;
|
|
+ }
|
|
|
|
- for (int32_t i = 1; i < g_stack_group.stack_num; i++) {
|
|
- if (g_stack_group.stacks[i]->conn_num < g_stack_group.stacks[min_index]->conn_num) {
|
|
- min_index = i;
|
|
+ struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
+ uint16_t index = 0;
|
|
+
|
|
+ /* close listen shadow, per app communication thread select only one stack */
|
|
+ if (get_global_cfg_params()->listen_shadow == false) {
|
|
+ static uint16_t stack_index = 0;
|
|
+ index = atomic_fetch_add(&stack_index, 1);
|
|
+ if (index >= stack_group->stack_num) {
|
|
+ LSTACK_LOG(ERR, LSTACK, "thread =%hu larger than stack num = %hu\n", index, stack_group->stack_num);
|
|
+ return NULL;
|
|
+ }
|
|
+ /* use listen shadow, app communication thread maybe more than stack num, select the least load stack */
|
|
+ } else {
|
|
+ for (uint16_t i = 1; i < stack_group->stack_num; i++) {
|
|
+ if (stack_group->stacks[i]->conn_num < stack_group->stacks[index]->conn_num) {
|
|
+ index = i;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
- return g_stack_group.stacks[min_index];
|
|
+ bind_stack = stack_group->stacks[index];
|
|
+ return stack_group->stacks[index];
|
|
}
|
|
|
|
void lstack_low_power_idling(void)
|
|
@@ -193,7 +216,7 @@ static int32_t create_thread(uint16_t queue_id, char *thread_name, stack_thread_
|
|
return 0;
|
|
}
|
|
|
|
-static void* gazelle_weakup_thread(void *arg)
|
|
+static void* gazelle_wakeup_thread(void *arg)
|
|
{
|
|
uint16_t queue_id = *(uint16_t *)arg;
|
|
struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id];
|
|
@@ -203,17 +226,13 @@ static void* gazelle_weakup_thread(void *arg)
|
|
|
|
LSTACK_LOG(INFO, LSTACK, "weakup_%02d start\n", stack->queue_id);
|
|
|
|
+ sem_t *event_sem[WAKEUP_MAX_NUM];
|
|
+ int num;
|
|
for (;;) {
|
|
- if (rte_ring_count(stack->wakeup_ring) == 0) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- sem_t *event_sem;
|
|
- if (rte_ring_sc_dequeue(stack->wakeup_ring, (void **)&event_sem)) {
|
|
- continue;
|
|
+ num = gazelle_light_ring_dequeue_burst(stack->wakeup_ring, (void **)event_sem, WAKEUP_MAX_NUM);
|
|
+ for (int i = 0; i < num; i++) {
|
|
+ sem_post(event_sem[i]);
|
|
}
|
|
-
|
|
- sem_post(event_sem);
|
|
}
|
|
|
|
return NULL;
|
|
@@ -233,12 +252,8 @@ static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id)
|
|
stack->lwip_stats = &lwip_stats;
|
|
|
|
init_list_node(&stack->recv_list);
|
|
- init_list_node(&stack->listen_list);
|
|
- init_list_node(&stack->event_list);
|
|
init_list_node(&stack->send_list);
|
|
|
|
- pthread_spin_init(&stack->event_lock, PTHREAD_PROCESS_SHARED);
|
|
-
|
|
sys_calibrate_tsc();
|
|
stack_stat_init();
|
|
|
|
@@ -297,8 +312,10 @@ static void* gazelle_kernel_event(void *arg)
|
|
}
|
|
|
|
for (int32_t i = 0; i < event_num; i++) {
|
|
- if (events[i].data.ptr) {
|
|
- sem_post((sem_t *)events[i].data.ptr);
|
|
+ struct wakeup_poll *wakeup = events[i].data.ptr;
|
|
+ if (wakeup) {
|
|
+ __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE);
|
|
+ sem_post(&wakeup->event_sem);
|
|
}
|
|
}
|
|
}
|
|
@@ -311,7 +328,7 @@ static int32_t create_companion_thread(struct protocol_stack_group *stack_group,
|
|
int32_t ret;
|
|
|
|
if (stack_group->wakeup_enable) {
|
|
- ret = create_thread(stack->queue_id, "gazelleweakup", gazelle_weakup_thread);
|
|
+ ret = create_thread(stack->queue_id, "gazelleweakup", gazelle_wakeup_thread);
|
|
if (ret != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "gazelleweakup ret=%d errno=%d\n", ret, errno);
|
|
return ret;
|
|
@@ -339,13 +356,11 @@ static struct protocol_stack * stack_thread_init(uint16_t queue_id)
|
|
|
|
struct protocol_stack *stack = malloc(sizeof(*stack));
|
|
if (stack == NULL) {
|
|
- sem_post(&stack_group->thread_phase1);
|
|
LSTACK_LOG(ERR, LSTACK, "malloc stack failed\n");
|
|
return NULL;
|
|
}
|
|
|
|
if (init_stack_value(stack, queue_id) != 0) {
|
|
- sem_post(&stack_group->thread_phase1);
|
|
free(stack);
|
|
return NULL;
|
|
}
|
|
@@ -358,7 +373,6 @@ static struct protocol_stack * stack_thread_init(uint16_t queue_id)
|
|
|
|
if (use_ltran()) {
|
|
if (client_reg_thrd_ring() != 0) {
|
|
- sem_post(&stack_group->thread_phase1);
|
|
free(stack);
|
|
return NULL;
|
|
}
|
|
@@ -419,6 +433,8 @@ static int32_t init_protocol_sem(void)
|
|
int32_t ret;
|
|
struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
|
|
+ pthread_spin_init(&stack_group->wakeup_list_lock, PTHREAD_PROCESS_PRIVATE);
|
|
+
|
|
if (!use_ltran()) {
|
|
ret = sem_init(&stack_group->ethdev_init, 0, 0);
|
|
if (ret < 0) {
|
|
@@ -449,6 +465,7 @@ int32_t init_protocol_stack(void)
|
|
|
|
stack_group->stack_num = get_global_cfg_params()->num_cpu;
|
|
stack_group->wakeup_enable = (get_global_cfg_params()->num_wakeup > 0) ? true : false;
|
|
+ stack_group->wakeup_list = NULL;
|
|
|
|
if (init_protocol_sem() != 0) {
|
|
return -1;
|
|
@@ -486,58 +503,10 @@ void stack_socket(struct rpc_msg *msg)
|
|
}
|
|
}
|
|
|
|
-static inline bool is_real_close(int32_t fd)
|
|
-{
|
|
- struct lwip_sock *sock = get_socket_by_fd(fd);
|
|
-
|
|
- /* last sock */
|
|
- if (list_is_empty(&sock->attach_list)) {
|
|
- return true;
|
|
- }
|
|
-
|
|
- /* listen sock, but have attach sock */
|
|
- if (sock->attach_fd == fd) {
|
|
- sock->wait_close = true;
|
|
- return false;
|
|
- } else { /* attach sock */
|
|
- /* listen sock is normal */
|
|
- struct lwip_sock *listen_sock = get_socket_by_fd(sock->attach_fd);
|
|
- if (listen_sock == NULL || !listen_sock->wait_close) {
|
|
- list_del_node_init(&sock->attach_list);
|
|
- return true;
|
|
- }
|
|
-
|
|
- /* listen sock is wait clsoe. check this is last attach sock */
|
|
- struct list_node *list = &(sock->attach_list);
|
|
- struct list_node *node, *temp;
|
|
- uint32_t list_count = 0;
|
|
- list_for_each_safe(node, temp, list) {
|
|
- list_count++;
|
|
- }
|
|
- /* 2:listen sock is wait close and closing attach sock. close listen sock here */
|
|
- if (list_count == 2) {
|
|
- lwip_close(listen_sock->attach_fd);
|
|
- gazelle_clean_sock(listen_sock->attach_fd);
|
|
- posix_api->close_fn(listen_sock->attach_fd);
|
|
- list_del_node_init(&listen_sock->attach_list);
|
|
- }
|
|
- list_del_node_init(&sock->attach_list);
|
|
- return true;
|
|
- }
|
|
-
|
|
- list_del_node_init(&sock->attach_list);
|
|
- return true;
|
|
-}
|
|
-
|
|
void stack_close(struct rpc_msg *msg)
|
|
{
|
|
int32_t fd = msg->args[MSG_ARG_0].i;
|
|
|
|
- if (!is_real_close(fd)) {
|
|
- msg->result = 0;
|
|
- return;
|
|
- }
|
|
-
|
|
msg->result = lwip_close(fd);
|
|
if (msg->result != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "tid %ld, fd %d failed %ld\n", get_stack_tid(), msg->args[MSG_ARG_0].i, msg->result);
|
|
@@ -556,26 +525,8 @@ void stack_bind(struct rpc_msg *msg)
|
|
}
|
|
}
|
|
|
|
-static inline struct lwip_sock *reuse_listen(struct protocol_stack *stack, struct lwip_sock *listen_sock)
|
|
-{
|
|
- struct list_node *node, *temp;
|
|
- struct list_node *list = &(stack->listen_list);
|
|
- struct lwip_sock *sock;
|
|
-
|
|
- list_for_each_safe(node, temp, list) {
|
|
- sock = container_of(node, struct lwip_sock, listen_list);
|
|
- if (sock->conn->pcb.tcp->local_port == listen_sock->conn->pcb.tcp->local_port &&
|
|
- sock->conn->pcb.tcp->local_ip.addr == listen_sock->conn->pcb.tcp->local_ip.addr) {
|
|
- return sock;
|
|
- }
|
|
- }
|
|
-
|
|
- return NULL;
|
|
-}
|
|
-
|
|
void stack_listen(struct rpc_msg *msg)
|
|
{
|
|
- struct protocol_stack *stack = get_protocol_stack();
|
|
int32_t fd = msg->args[MSG_ARG_0].i;
|
|
int32_t backlog = msg->args[MSG_ARG_1].i;
|
|
|
|
@@ -585,25 +536,9 @@ void stack_listen(struct rpc_msg *msg)
|
|
return;
|
|
}
|
|
|
|
- /* stack have listen same ip+port, then attach to old listen */
|
|
- struct lwip_sock *listen_sock = reuse_listen(stack, sock);
|
|
- if (listen_sock) {
|
|
- if (list_is_empty(&sock->attach_list)) {
|
|
- list_add_node(&listen_sock->attach_list, &sock->attach_list);
|
|
- }
|
|
- sock->attach_fd = listen_sock->conn->socket;
|
|
- msg->result = 0;
|
|
- return;
|
|
- }
|
|
-
|
|
/* new listen add to stack listen list */
|
|
msg->result = lwip_listen(fd, backlog);
|
|
- if (msg->result == 0) {
|
|
- if (list_is_empty(&sock->listen_list)) {
|
|
- list_add_node(&stack->listen_list, &sock->listen_list);
|
|
- }
|
|
- sock->attach_fd = fd;
|
|
- } else {
|
|
+ if (msg->result != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "tid %ld, fd %d failed %ld\n", get_stack_tid(), msg->args[MSG_ARG_0].i, msg->result);
|
|
}
|
|
}
|
|
@@ -611,28 +546,35 @@ void stack_listen(struct rpc_msg *msg)
|
|
void stack_accept(struct rpc_msg *msg)
|
|
{
|
|
int32_t fd = msg->args[MSG_ARG_0].i;
|
|
+ msg->result = -1;
|
|
|
|
int32_t accept_fd = lwip_accept(fd, msg->args[MSG_ARG_1].p, msg->args[MSG_ARG_2].p);
|
|
- if (accept_fd > 0) {
|
|
- struct lwip_sock *sock = get_socket(accept_fd);
|
|
- if (sock && sock->stack) {
|
|
- msg->result = accept_fd;
|
|
- return;
|
|
- }
|
|
+ if (accept_fd < 0) {
|
|
+ LSTACK_LOG(ERR, LSTACK, "fd %d ret %d\n", fd, accept_fd);
|
|
+ return;
|
|
+ }
|
|
|
|
+ struct lwip_sock *sock = get_socket(accept_fd);
|
|
+ if (sock == NULL || sock->stack == NULL) {
|
|
lwip_close(accept_fd);
|
|
gazelle_clean_sock(accept_fd);
|
|
posix_api->close_fn(accept_fd);
|
|
+ LSTACK_LOG(ERR, LSTACK, "fd %d ret %d\n", fd, accept_fd);
|
|
+ return;
|
|
}
|
|
|
|
- LSTACK_LOG(ERR, LSTACK, "tid %ld, fd %d attach_fd %d failed %d\n", get_stack_tid(), msg->args[MSG_ARG_0].i,
|
|
- fd, accept_fd);
|
|
- msg->result = -1;
|
|
+ msg->result = accept_fd;
|
|
+ if (rte_ring_count(sock->conn->recvmbox->ring)) {
|
|
+ add_recv_list(accept_fd);
|
|
+ }
|
|
}
|
|
|
|
void stack_connect(struct rpc_msg *msg)
|
|
{
|
|
msg->result = lwip_connect(msg->args[MSG_ARG_0].i, msg->args[MSG_ARG_1].p, msg->args[MSG_ARG_2].socklen);
|
|
+ if (msg->result < 0) {
|
|
+ msg->result = -errno;
|
|
+ }
|
|
}
|
|
|
|
void stack_getpeername(struct rpc_msg *msg)
|
|
@@ -723,6 +665,7 @@ void stack_broadcast_arp(struct rte_mbuf *mbuf, struct protocol_stack *cur_stack
|
|
|
|
ret = gazelle_alloc_pktmbuf(stack->rx_pktmbuf_pool, &mbuf_copy, 1);
|
|
if (ret != 0) {
|
|
+ stack->stats.rx_allocmbuf_fail++;
|
|
return;
|
|
}
|
|
copy_mbuf(mbuf_copy, mbuf);
|
|
@@ -737,22 +680,28 @@ void stack_broadcast_arp(struct rte_mbuf *mbuf, struct protocol_stack *cur_stack
|
|
/* when fd is listenfd, listenfd of all protocol stack thread will be closed */
|
|
int32_t stack_broadcast_close(int32_t fd)
|
|
{
|
|
- struct lwip_sock *sock = NULL;
|
|
- int32_t next_fd;
|
|
+ struct lwip_sock *sock = get_socket(fd);
|
|
+ int32_t ret = 0;
|
|
+
|
|
+ do {
|
|
+ sock = sock->listen_next;
|
|
+ if (rpc_call_close(fd)) {
|
|
+ ret = -1;
|
|
+ }
|
|
|
|
- while (fd > 0) {
|
|
- sock = get_socket(fd);
|
|
if (sock == NULL) {
|
|
- LSTACK_LOG(ERR, LSTACK, "tid %ld, %d get sock null\n", get_stack_tid(), fd);
|
|
- GAZELLE_RETURN(EINVAL);
|
|
+ break;
|
|
}
|
|
- next_fd = sock->nextfd;
|
|
+ fd = sock->conn->socket;
|
|
+ } while (sock);
|
|
|
|
- rpc_call_close(fd);
|
|
- fd = next_fd;
|
|
- }
|
|
+ return ret;
|
|
+}
|
|
|
|
- return 0;
|
|
+/* choice one stack listen */
|
|
+int32_t stack_single_listen(int32_t fd, int32_t backlog)
|
|
+{
|
|
+ return rpc_call_listen(fd, backlog);
|
|
}
|
|
|
|
/* listen sync to all protocol stack thread, so that any protocol stack thread can build connect */
|
|
@@ -797,44 +746,59 @@ int32_t stack_broadcast_listen(int32_t fd, int32_t backlog)
|
|
return 0;
|
|
}
|
|
|
|
-/* ergodic the protocol stack thread to find the connection, because all threads are listening */
|
|
-int32_t stack_broadcast_accept(int32_t fd, struct sockaddr *addr, socklen_t *addrlen)
|
|
+static struct lwip_sock *get_min_accept_sock(int32_t fd)
|
|
{
|
|
struct lwip_sock *sock = get_socket(fd);
|
|
- if (sock == NULL || sock->attach_fd < 0) {
|
|
- errno = EINVAL;
|
|
- return -1;
|
|
- }
|
|
- fd = sock->attach_fd;
|
|
-
|
|
struct lwip_sock *min_sock = NULL;
|
|
- int32_t min_fd = fd;
|
|
- while (fd > 0) {
|
|
- sock = get_socket(fd);
|
|
- if (sock == NULL) {
|
|
- GAZELLE_RETURN(EINVAL);
|
|
- }
|
|
- struct lwip_sock *attach_sock = get_socket(sock->attach_fd);
|
|
- if (attach_sock == NULL) {
|
|
- GAZELLE_RETURN(EINVAL);
|
|
- }
|
|
|
|
- if (!NETCONN_IS_ACCEPTIN(attach_sock)) {
|
|
- fd = sock->nextfd;
|
|
+ while (sock) {
|
|
+ if (!NETCONN_IS_ACCEPTIN(sock)) {
|
|
+ sock = sock->listen_next;
|
|
continue;
|
|
}
|
|
|
|
- if (min_sock == NULL || min_sock->stack->conn_num > attach_sock->stack->conn_num) {
|
|
- min_sock = attach_sock;
|
|
- min_fd = sock->attach_fd;
|
|
+ if (min_sock == NULL || min_sock->stack->conn_num > sock->stack->conn_num) {
|
|
+ min_sock = sock;
|
|
}
|
|
|
|
- fd = sock->nextfd;
|
|
+ sock = sock->listen_next;
|
|
+ }
|
|
+
|
|
+ return min_sock;
|
|
+}
|
|
+
|
|
+static void inline del_accept_in_event(struct lwip_sock *sock)
|
|
+{
|
|
+ pthread_spin_lock(&sock->wakeup->event_list_lock);
|
|
+
|
|
+ if (!NETCONN_IS_ACCEPTIN(sock)) {
|
|
+ sock->events &= ~EPOLLIN;
|
|
+ if (sock->events == 0) {
|
|
+ list_del_node_null(&sock->event_list);
|
|
+ }
|
|
}
|
|
|
|
+ pthread_spin_unlock(&sock->wakeup->event_list_lock);
|
|
+}
|
|
+
|
|
+/* ergodic the protocol stack thread to find the connection, because all threads are listening */
|
|
+int32_t stack_broadcast_accept(int32_t fd, struct sockaddr *addr, socklen_t *addrlen)
|
|
+{
|
|
int32_t ret = -1;
|
|
- if (min_sock) {
|
|
- ret = rpc_call_accept(min_fd, addr, addrlen);
|
|
+
|
|
+ struct lwip_sock *sock = get_socket(fd);
|
|
+ if (sock == NULL) {
|
|
+ errno = EINVAL;
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ struct lwip_sock *min_sock = get_min_accept_sock(fd);
|
|
+ if (min_sock && min_sock->conn) {
|
|
+ ret = rpc_call_accept(min_sock->conn->socket, addr, addrlen);
|
|
+ }
|
|
+
|
|
+ if (min_sock && min_sock->wakeup && min_sock->wakeup->type == WAKEUP_EPOLL) {
|
|
+ del_accept_in_event(min_sock);
|
|
}
|
|
|
|
if (ret < 0) {
|
|
diff --git a/src/lstack/core/lstack_stack_stat.c b/src/lstack/core/lstack_stack_stat.c
|
|
index 743857f..06fac5c 100644
|
|
--- a/src/lstack/core/lstack_stack_stat.c
|
|
+++ b/src/lstack/core/lstack_stack_stat.c
|
|
@@ -25,6 +25,7 @@
|
|
#include "gazelle_dfx_msg.h"
|
|
#include "lstack_thread_rpc.h"
|
|
#include "lstack_stack_stat.h"
|
|
+#include "posix/lstack_epoll.h"
|
|
|
|
#define US_PER_SEC 1000000
|
|
|
|
@@ -87,6 +88,68 @@ static void set_latency_start_flag(bool start)
|
|
}
|
|
}
|
|
|
|
+void register_wakeup(struct wakeup_poll *wakeup)
|
|
+{
|
|
+ struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
+
|
|
+ pthread_spin_lock(&stack_group->wakeup_list_lock);
|
|
+
|
|
+ wakeup->next = stack_group->wakeup_list;
|
|
+ stack_group->wakeup_list = wakeup;
|
|
+
|
|
+ pthread_spin_unlock(&stack_group->wakeup_list_lock);
|
|
+}
|
|
+
|
|
+void unregister_wakeup(struct wakeup_poll *wakeup)
|
|
+{
|
|
+ struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
+
|
|
+ pthread_spin_lock(&stack_group->wakeup_list_lock);
|
|
+
|
|
+ struct wakeup_poll *node = stack_group->wakeup_list;
|
|
+ struct wakeup_poll *pre = NULL;
|
|
+
|
|
+ while(node && node != wakeup) {
|
|
+ pre = node;
|
|
+ node = node->next;
|
|
+ }
|
|
+
|
|
+ if (node == NULL) {
|
|
+ pthread_spin_unlock(&stack_group->wakeup_list_lock);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (pre) {
|
|
+ pre->next = node->next;
|
|
+ } else {
|
|
+ stack_group->wakeup_list = node->next;
|
|
+ }
|
|
+
|
|
+ pthread_spin_unlock(&stack_group->wakeup_list_lock);
|
|
+}
|
|
+
|
|
+static void get_wakeup_stat(struct protocol_stack *stack, struct gazelle_wakeup_stat *stat)
|
|
+{
|
|
+ struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
+
|
|
+ pthread_spin_lock(&stack_group->wakeup_list_lock);
|
|
+
|
|
+ struct wakeup_poll *node = stack_group->wakeup_list;
|
|
+ while (node) {
|
|
+ if (node->bind_stack == stack) {
|
|
+ stat->app_events += node->stat.app_events;
|
|
+ stat->read_null += node->stat.read_null;
|
|
+ stat->app_write_cnt += node->stat.app_write_cnt;
|
|
+ stat->app_write_idlefail += node->stat.app_write_idlefail;
|
|
+ stat->app_read_cnt += node->stat.app_read_cnt;
|
|
+ }
|
|
+
|
|
+ node = node->next;
|
|
+ }
|
|
+
|
|
+ pthread_spin_unlock(&stack_group->wakeup_list_lock);
|
|
+}
|
|
+
|
|
void lstack_get_low_power_info(struct gazelle_stat_low_power_info *low_power_info)
|
|
{
|
|
struct cfg_params *cfg = get_global_cfg_params();
|
|
@@ -102,21 +165,24 @@ static void get_stack_stats(struct gazelle_stack_dfx_data *dfx, struct protocol_
|
|
struct protocol_stack_group *stack_group = get_protocol_stack_group();
|
|
|
|
dfx->loglevel = rte_log_get_level(RTE_LOGTYPE_LSTACK);
|
|
+
|
|
lstack_get_low_power_info(&dfx->low_power_info);
|
|
- memcpy_s(&dfx->data.pkts, sizeof(dfx->data.pkts), &stack->stats, sizeof(dfx->data.pkts));
|
|
+
|
|
+ memcpy_s(&dfx->data.pkts.stack_stat, sizeof(struct gazelle_stack_stat), &stack->stats,
|
|
+ sizeof(struct gazelle_stack_stat));
|
|
+
|
|
+ get_wakeup_stat(stack, &dfx->data.pkts.wakeup_stat);
|
|
+
|
|
dfx->data.pkts.call_alloc_fail = stack_group->call_alloc_fail;
|
|
|
|
int32_t rpc_call_result = rpc_call_msgcnt(stack);
|
|
dfx->data.pkts.call_msg_cnt = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
|
|
rpc_call_result = rpc_call_recvlistcnt(stack);
|
|
- dfx->data.pkts.recv_list = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
-
|
|
- rpc_call_result = rpc_call_eventlistcnt(stack);
|
|
- dfx->data.pkts.event_list = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
+ dfx->data.pkts.recv_list_cnt = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
|
|
rpc_call_result = rpc_call_sendlistcnt(stack);
|
|
- dfx->data.pkts.send_list = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
+ dfx->data.pkts.send_list_cnt = (rpc_call_result < 0) ? 0 : rpc_call_result;
|
|
|
|
dfx->data.pkts.conn_num = stack->conn_num;
|
|
}
|
|
@@ -182,6 +248,8 @@ int32_t handle_stack_cmd(int32_t fd, enum GAZELLE_STAT_MODE stat_mode)
|
|
|
|
for (uint32_t i = 0; i < stack_group->stack_num; i++) {
|
|
struct protocol_stack *stack = stack_group->stacks[i];
|
|
+
|
|
+ memset_s(&dfx, sizeof(dfx), 0, sizeof(dfx));
|
|
get_stack_dfx_data(&dfx, stack, stat_mode);
|
|
|
|
if (!use_ltran() &&
|
|
diff --git a/src/lstack/core/lstack_thread_rpc.c b/src/lstack/core/lstack_thread_rpc.c
|
|
index 312e192..8937920 100644
|
|
--- a/src/lstack/core/lstack_thread_rpc.c
|
|
+++ b/src/lstack/core/lstack_thread_rpc.c
|
|
@@ -23,34 +23,53 @@
|
|
#include "lstack_dpdk.h"
|
|
#include "lstack_thread_rpc.h"
|
|
|
|
-static PER_THREAD struct rte_mempool *rpc_pool = NULL;
|
|
+#define RPC_MSG_MAX 32
|
|
+struct rpc_msg_pool {
|
|
+ struct rpc_msg msgs[RPC_MSG_MAX];
|
|
+ uint32_t prod __rte_cache_aligned;
|
|
+ uint32_t cons __rte_cache_aligned;
|
|
+};
|
|
+
|
|
+static PER_THREAD struct rpc_msg_pool *g_rpc_pool = NULL;
|
|
+
|
|
+static inline __attribute__((always_inline)) struct rpc_msg *get_rpc_msg(struct rpc_msg_pool *rpc_pool)
|
|
+{
|
|
+ uint32_t cons = __atomic_load_n(&rpc_pool->cons, __ATOMIC_ACQUIRE);
|
|
+ uint32_t prod = rpc_pool->prod + 1;
|
|
+
|
|
+ if (prod == cons) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ rpc_pool->prod = prod;
|
|
+ return &rpc_pool->msgs[prod];
|
|
+}
|
|
|
|
static inline __attribute__((always_inline))
|
|
struct rpc_msg *rpc_msg_alloc(struct protocol_stack *stack, rpc_msg_func func)
|
|
{
|
|
- int32_t ret;
|
|
struct rpc_msg *msg = NULL;
|
|
|
|
if (stack == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
- static uint16_t pool_index = 0;
|
|
- if (rpc_pool == NULL) {
|
|
- rpc_pool = create_rpc_mempool("rpc_msg", atomic_fetch_add(&pool_index, 1));
|
|
- if (rpc_pool == NULL) {
|
|
+ if (g_rpc_pool == NULL) {
|
|
+ g_rpc_pool = calloc(1, sizeof(struct rpc_msg_pool));
|
|
+ if (g_rpc_pool == NULL) {
|
|
+ get_protocol_stack_group()->call_alloc_fail++;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
- ret = rte_mempool_get(rpc_pool, (void **)&msg);
|
|
- if (ret < 0) {
|
|
+ msg = get_rpc_msg(g_rpc_pool);
|
|
+ if (msg == NULL) {
|
|
get_protocol_stack_group()->call_alloc_fail++;
|
|
return NULL;
|
|
}
|
|
- msg->pool = rpc_pool;
|
|
+ msg->pool = g_rpc_pool;
|
|
|
|
- pthread_spin_init(&msg->lock, PTHREAD_PROCESS_SHARED);
|
|
+ pthread_spin_init(&msg->lock, PTHREAD_PROCESS_PRIVATE);
|
|
msg->func = func;
|
|
msg->self_release = 1;
|
|
|
|
@@ -64,7 +83,8 @@ void rpc_msg_free(struct rpc_msg *msg)
|
|
|
|
msg->self_release = 0;
|
|
msg->func = NULL;
|
|
- rte_mempool_put(msg->pool, (void *)msg);
|
|
+
|
|
+ atomic_fetch_add(&msg->pool->cons, 1);
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
@@ -109,8 +129,6 @@ void poll_rpc_msg(struct protocol_stack *stack, uint32_t max_num)
|
|
stack->stats.call_null++;
|
|
}
|
|
|
|
- rte_mb();
|
|
-
|
|
if (msg->self_release) {
|
|
pthread_spin_unlock(&msg->lock);
|
|
} else {
|
|
@@ -192,16 +210,6 @@ int32_t rpc_call_thread_regphase2(struct protocol_stack *stack, void *conn)
|
|
return rpc_sync_call(&stack->rpc_queue, msg);
|
|
}
|
|
|
|
-int32_t rpc_call_eventlistcnt(struct protocol_stack *stack)
|
|
-{
|
|
- struct rpc_msg *msg = rpc_msg_alloc(stack, stack_eventlist_count);
|
|
- if (msg == NULL) {
|
|
- return -1;
|
|
- }
|
|
-
|
|
- return rpc_sync_call(&stack->rpc_queue, msg);
|
|
-}
|
|
-
|
|
int32_t rpc_call_sendlistcnt(struct protocol_stack *stack)
|
|
{
|
|
struct rpc_msg *msg = rpc_msg_alloc(stack, stack_sendlist_count);
|
|
@@ -222,28 +230,6 @@ int32_t rpc_call_recvlistcnt(struct protocol_stack *stack)
|
|
return rpc_sync_call(&stack->rpc_queue, msg);
|
|
}
|
|
|
|
-void add_epoll_event(struct netconn *conn, uint32_t event);
|
|
-static void rpc_add_event(struct rpc_msg *msg)
|
|
-{
|
|
- struct lwip_sock *sock = (struct lwip_sock *)msg->args[MSG_ARG_0].p;
|
|
- if (sock->conn) {
|
|
- add_epoll_event(sock->conn, sock->events);
|
|
- }
|
|
-}
|
|
-
|
|
-void rpc_call_addevent(struct protocol_stack *stack, void *sock)
|
|
-{
|
|
- struct rpc_msg *msg = rpc_msg_alloc(stack, rpc_add_event);
|
|
- if (msg == NULL) {
|
|
- return;
|
|
- }
|
|
-
|
|
- msg->args[MSG_ARG_0].p = sock;
|
|
-
|
|
- msg->self_release = 0;
|
|
- rpc_call(&stack->rpc_queue, msg);
|
|
-}
|
|
-
|
|
int32_t rpc_call_arp(struct protocol_stack *stack, struct rte_mbuf *mbuf)
|
|
{
|
|
struct rpc_msg *msg = rpc_msg_alloc(stack, stack_arp);
|
|
@@ -260,7 +246,7 @@ int32_t rpc_call_arp(struct protocol_stack *stack, struct rte_mbuf *mbuf)
|
|
|
|
int32_t rpc_call_socket(int32_t domain, int32_t type, int32_t protocol)
|
|
{
|
|
- struct protocol_stack *stack = get_minconn_protocol_stack();
|
|
+ struct protocol_stack *stack = get_bind_protocol_stack();
|
|
struct rpc_msg *msg = rpc_msg_alloc(stack, stack_socket);
|
|
if (msg == NULL) {
|
|
return -1;
|
|
@@ -342,7 +328,12 @@ int32_t rpc_call_connect(int fd, const struct sockaddr *addr, socklen_t addrlen)
|
|
msg->args[MSG_ARG_1].cp = addr;
|
|
msg->args[MSG_ARG_2].socklen = addrlen;
|
|
|
|
- return rpc_sync_call(&stack->rpc_queue, msg);
|
|
+ int32_t ret = rpc_sync_call(&stack->rpc_queue, msg);
|
|
+ if (ret < 0) {
|
|
+ errno = -ret;
|
|
+ return -1;
|
|
+ }
|
|
+ return ret;
|
|
}
|
|
|
|
int32_t rpc_call_getpeername(int fd, struct sockaddr *addr, socklen_t *addrlen)
|
|
diff --git a/src/lstack/include/lstack_cfg.h b/src/lstack/include/lstack_cfg.h
|
|
index 987828d..aeffbb3 100644
|
|
--- a/src/lstack/include/lstack_cfg.h
|
|
+++ b/src/lstack/include/lstack_cfg.h
|
|
@@ -75,6 +75,7 @@ struct cfg_params {
|
|
uint32_t lpm_pkts_in_detect;
|
|
bool use_ltran; // ture:lstack read from nic false:read form ltran
|
|
bool kni_switch;
|
|
+ bool listen_shadow; // true:listen in all stack thread. false:listen in one stack thread.
|
|
int dpdk_argc;
|
|
char **dpdk_argv;
|
|
struct secondary_attach_arg sec_attach_arg;
|
|
diff --git a/src/lstack/include/lstack_dpdk.h b/src/lstack/include/lstack_dpdk.h
|
|
index bb9be21..6ffcc41 100644
|
|
--- a/src/lstack/include/lstack_dpdk.h
|
|
+++ b/src/lstack/include/lstack_dpdk.h
|
|
@@ -23,7 +23,7 @@
|
|
#include "dpdk_common.h"
|
|
struct protocol_stack;
|
|
|
|
-#define RX_NB_MBUF ((5 * (MAX_CLIENTS / 4)) + (VDEV_RX_QUEUE_SZ * DEFAULT_BACKUP_RING_SIZE_FACTOR))
|
|
+#define RX_NB_MBUF ((5 * MAX_CLIENTS) + (VDEV_RX_QUEUE_SZ * DEFAULT_BACKUP_RING_SIZE_FACTOR))
|
|
#define RX_MBUF_CACHE_SZ (VDEV_RX_QUEUE_SZ)
|
|
#define TX_NB_MBUF (128 * DEFAULT_RING_SIZE)
|
|
#define TX_MBUF_CACHE_SZ (DEFAULT_RING_SIZE)
|
|
@@ -34,13 +34,13 @@ struct protocol_stack;
|
|
|
|
#define MAX_PACKET_SZ 2048
|
|
|
|
+#define RING_SIZE(x) ((x) - 1)
|
|
|
|
#define MBUF_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM)
|
|
|
|
#define MAX_CORE_NUM 256
|
|
#define CALL_MSG_RING_SIZE (unsigned long long)32
|
|
#define CALL_CACHE_SZ 0
|
|
-#define CALL_POOL_SZ 128
|
|
|
|
/* Layout:
|
|
* | rte_mbuf | pbuf | custom_free_function | payload |
|
|
@@ -62,7 +62,6 @@ int32_t dpdk_eal_init(void);
|
|
int32_t pktmbuf_pool_init(struct protocol_stack *stack, uint16_t stack_num);
|
|
struct rte_ring *create_ring(const char *name, uint32_t count, uint32_t flags, int32_t queue_id);
|
|
int32_t create_shared_ring(struct protocol_stack *stack);
|
|
-struct rte_mempool *create_rpc_mempool(const char *name, uint16_t queue_id);
|
|
void lstack_log_level_init(void);
|
|
int dpdk_ethdev_init(void);
|
|
int dpdk_ethdev_start(void);
|
|
diff --git a/src/lstack/include/lstack_lockless_queue.h b/src/lstack/include/lstack_lockless_queue.h
|
|
index c00d3a2..c70b56a 100644
|
|
--- a/src/lstack/include/lstack_lockless_queue.h
|
|
+++ b/src/lstack/include/lstack_lockless_queue.h
|
|
@@ -1,5 +1,13 @@
|
|
/*
|
|
-* Copyright (c) 2010-2011 Dmitry Vyukov. All rights reserved.
|
|
+* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
|
+* gazelle is licensed under the Mulan PSL v2.
|
|
+* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
+* You may obtain a copy of Mulan PSL v2 at:
|
|
+* http://license.coscl.org.cn/MulanPSL2
|
|
+* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|
+* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|
+* PURPOSE.
|
|
+* See the Mulan PSL v2 for more details.
|
|
*/
|
|
|
|
#ifndef __GAZELLE_LOCKLESS_QUEUE_H__
|
|
diff --git a/src/lstack/include/lstack_lwip.h b/src/lstack/include/lstack_lwip.h
|
|
index c73e3a7..ba57541 100644
|
|
--- a/src/lstack/include/lstack_lwip.h
|
|
+++ b/src/lstack/include/lstack_lwip.h
|
|
@@ -14,18 +14,16 @@
|
|
#define __GAZELLE_LWIP_H__
|
|
|
|
#include "lstack_thread_rpc.h"
|
|
+#include "dpdk_common.h"
|
|
#include "lwipsock.h"
|
|
|
|
-#define SOCK_RECV_RING_SIZE (128)
|
|
-#define SOCK_SEND_RING_SIZE (32)
|
|
|
|
#define NETCONN_IS_ACCEPTIN(sock) (((sock)->conn->acceptmbox != NULL) && !sys_mbox_empty((sock)->conn->acceptmbox))
|
|
-#define NETCONN_IS_DATAIN(sock) ((rte_ring_count((sock)->recv_ring) || (sock)->recv_lastdata))
|
|
-#define NETCONN_IS_DATAOUT(sock) ((rte_ring_count((sock)->send_ring) || (sock)->send_lastdata))
|
|
-#define NETCONN_IS_OUTIDLE(sock) rte_ring_free_count((sock)->send_ring)
|
|
+#define NETCONN_IS_DATAIN(sock) ((gazelle_ring_readable_count((sock)->recv_ring) || (sock)->recv_lastdata))
|
|
+#define NETCONN_IS_DATAOUT(sock) gazelle_ring_readover_count((sock)->send_ring)
|
|
+#define NETCONN_IS_OUTIDLE(sock) gazelle_ring_readable_count((sock)->send_ring)
|
|
|
|
void create_shadow_fd(struct rpc_msg *msg);
|
|
-void listen_list_add_node(int32_t head_fd, int32_t add_fd);
|
|
void gazelle_init_sock(int32_t fd);
|
|
int32_t gazelle_socket(int domain, int type, int protocol);
|
|
void gazelle_clean_sock(int32_t fd);
|
|
@@ -37,7 +35,6 @@ void read_recv_list(struct protocol_stack *stack, uint32_t max_num);
|
|
void send_stack_list(struct protocol_stack *stack, uint32_t send_max);
|
|
void add_recv_list(int32_t fd);
|
|
void stack_sendlist_count(struct rpc_msg *msg);
|
|
-void stack_eventlist_count(struct rpc_msg *msg);
|
|
void get_lwip_conntable(struct rpc_msg *msg);
|
|
void get_lwip_connnum(struct rpc_msg *msg);
|
|
void stack_recvlist_count(struct rpc_msg *msg);
|
|
diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h
|
|
index bc4e4bd..8a6aa9d 100644
|
|
--- a/src/lstack/include/lstack_protocol_stack.h
|
|
+++ b/src/lstack/include/lstack_protocol_stack.h
|
|
@@ -14,48 +14,52 @@
|
|
#define __GAZELLE_PROTOCOL_STACK_H__
|
|
|
|
#include <semaphore.h>
|
|
-#include <pthread.h>
|
|
#include <lwip/list.h>
|
|
#include <lwip/netif.h>
|
|
+#include <rte_common.h>
|
|
#include "dpdk_common.h"
|
|
#include "lstack_thread_rpc.h"
|
|
#include "gazelle_dfx_msg.h"
|
|
#include "lstack_lockless_queue.h"
|
|
|
|
+#define SOCK_RECV_RING_SIZE (128)
|
|
+#define SOCK_RECV_FREE_THRES (32)
|
|
+#define SOCK_SEND_RING_SIZE (32)
|
|
+#define SOCK_SEND_REPLENISH_THRES (16)
|
|
+#define WAKEUP_MAX_NUM (32)
|
|
+
|
|
struct protocol_stack {
|
|
uint32_t tid;
|
|
uint16_t queue_id;
|
|
uint16_t port_id;
|
|
uint16_t socket_id;
|
|
uint16_t cpu_id;
|
|
- volatile uint16_t conn_num;
|
|
cpu_set_t idle_cpuset; /* idle cpu in numa of stack, app thread bind to it */
|
|
+ int32_t epollfd; /* kernel event thread epoll fd */
|
|
|
|
- lockless_queue rpc_queue;
|
|
struct rte_mempool *rx_pktmbuf_pool;
|
|
struct rte_mempool *tx_pktmbuf_pool;
|
|
struct rte_ring *rx_ring;
|
|
struct rte_ring *tx_ring;
|
|
struct rte_ring *reg_ring;
|
|
struct rte_ring *wakeup_ring;
|
|
-
|
|
struct reg_ring_msg *reg_buf;
|
|
|
|
+ volatile uint16_t conn_num __rte_cache_aligned;
|
|
+ lockless_queue rpc_queue __rte_cache_aligned;
|
|
+ char pad __rte_cache_aligned;
|
|
+
|
|
struct netif netif;
|
|
+ struct eth_dev_ops *dev_ops;
|
|
uint32_t rx_ring_used;
|
|
uint32_t tx_ring_used;
|
|
- struct eth_dev_ops *dev_ops;
|
|
|
|
struct list_node recv_list;
|
|
- struct list_node listen_list;
|
|
struct list_node send_list;
|
|
- struct list_node event_list;
|
|
- pthread_spinlock_t event_lock;
|
|
- int32_t epollfd; /* kernel event thread epoll fd */
|
|
|
|
- struct gazelle_stat_pkts stats;
|
|
- struct gazelle_stack_latency latency;
|
|
struct stats_ *lwip_stats;
|
|
+ struct gazelle_stack_latency latency;
|
|
+ struct gazelle_stack_stat stats __rte_cache_aligned;
|
|
};
|
|
|
|
struct eth_params;
|
|
@@ -74,12 +78,14 @@ struct protocol_stack_group {
|
|
/* dfx stats */
|
|
bool latency_start;
|
|
uint64_t call_alloc_fail;
|
|
+ pthread_spinlock_t wakeup_list_lock;
|
|
+ struct wakeup_poll *wakeup_list __rte_cache_aligned;
|
|
};
|
|
|
|
long get_stack_tid(void);
|
|
struct protocol_stack *get_protocol_stack(void);
|
|
struct protocol_stack *get_protocol_stack_by_fd(int32_t fd);
|
|
-struct protocol_stack *get_minconn_protocol_stack(void);
|
|
+struct protocol_stack *get_bind_protocol_stack(void);
|
|
struct protocol_stack_group *get_protocol_stack_group(void);
|
|
|
|
int32_t init_protocol_stack(void);
|
|
@@ -96,6 +102,7 @@ int32_t stack_broadcast_close(int32_t fd);
|
|
|
|
/* listen sync to all protocol stack thread, so that any protocol stack thread can build connect */
|
|
int32_t stack_broadcast_listen(int32_t fd, int backlog);
|
|
+int32_t stack_single_listen(int32_t fd, int32_t backlog);
|
|
|
|
/* ergodic the protocol stack thread to find the connection, because all threads are listening */
|
|
int32_t stack_broadcast_accept(int32_t fd, struct sockaddr *addr, socklen_t *addrlen);
|
|
diff --git a/src/lstack/include/lstack_stack_stat.h b/src/lstack/include/lstack_stack_stat.h
|
|
index 2c3bf8f..e152fe6 100644
|
|
--- a/src/lstack/include/lstack_stack_stat.h
|
|
+++ b/src/lstack/include/lstack_stack_stat.h
|
|
@@ -24,4 +24,7 @@ void stack_stat_init(void);
|
|
int32_t handle_stack_cmd(int fd, enum GAZELLE_STAT_MODE stat_mode);
|
|
uint64_t get_current_time(void);
|
|
void lstack_get_low_power_info(struct gazelle_stat_low_power_info *low_power_info);
|
|
+void register_wakeup(struct wakeup_poll *wakeup);
|
|
+void unregister_wakeup(struct wakeup_poll *wakeup);
|
|
+
|
|
#endif /* GAZELLE_STACK_STAT_H */
|
|
diff --git a/src/lstack/include/lstack_thread_rpc.h b/src/lstack/include/lstack_thread_rpc.h
|
|
index 61bcd38..35e6b1e 100644
|
|
--- a/src/lstack/include/lstack_thread_rpc.h
|
|
+++ b/src/lstack/include/lstack_thread_rpc.h
|
|
@@ -37,12 +37,13 @@ union rpc_msg_arg {
|
|
socklen_t socklen;
|
|
size_t size;
|
|
};
|
|
+struct rpc_msg_pool;
|
|
struct rpc_msg {
|
|
pthread_spinlock_t lock; /* msg handler unlock notice sender msg process done */
|
|
int32_t self_release; /* 0:msg handler release msg 1:msg sender release msg */
|
|
int64_t result; /* func return val */
|
|
lockless_queue_node queue_node;
|
|
- struct rte_mempool *pool;
|
|
+ struct rpc_msg_pool *pool;
|
|
|
|
rpc_msg_func func; /* msg handle func hook */
|
|
union rpc_msg_arg args[RPM_MSG_ARG_SIZE]; /* resolve by type */
|
|
@@ -50,11 +51,9 @@ struct rpc_msg {
|
|
|
|
struct protocol_stack;
|
|
void poll_rpc_msg(struct protocol_stack *stack, uint32_t max_num);
|
|
-void rpc_call_addevent(struct protocol_stack *stack, void *sock);
|
|
int32_t rpc_call_msgcnt(struct protocol_stack *stack);
|
|
int32_t rpc_call_shadow_fd(struct protocol_stack *stack, int32_t fd, const struct sockaddr *addr, socklen_t addrlen);
|
|
int32_t rpc_call_recvlistcnt(struct protocol_stack *stack);
|
|
-int32_t rpc_call_eventlistcnt(struct protocol_stack *stack);
|
|
int32_t rpc_call_sendlistcnt(struct protocol_stack *stack);
|
|
int32_t rpc_call_thread_regphase1(struct protocol_stack *stack, void *conn);
|
|
int32_t rpc_call_thread_regphase2(struct protocol_stack *stack, void *conn);
|
|
diff --git a/src/lstack/include/posix/lstack_epoll.h b/src/lstack/include/posix/lstack_epoll.h
|
|
index a83f41f..e9f9b91 100644
|
|
--- a/src/lstack/include/posix/lstack_epoll.h
|
|
+++ b/src/lstack/include/posix/lstack_epoll.h
|
|
@@ -20,16 +20,27 @@ extern "C" {
|
|
#include <poll.h>
|
|
#include <stdbool.h>
|
|
#include <semaphore.h>
|
|
+#include <pthread.h>
|
|
+#include <rte_common.h>
|
|
|
|
#include "lstack_protocol_stack.h"
|
|
|
|
+enum wakeup_type {
|
|
+ WAKEUP_EPOLL = 0,
|
|
+ WAKEUP_POLL,
|
|
+};
|
|
struct wakeup_poll {
|
|
+ /* stack thread read frequently */
|
|
+ sem_t event_sem __rte_cache_aligned;
|
|
+ enum wakeup_type type __rte_cache_aligned;
|
|
+ volatile bool have_kernel_event __rte_cache_aligned;
|
|
+ struct gazelle_wakeup_stat stat __rte_cache_aligned;
|
|
+ char pad __rte_cache_aligned;
|
|
+
|
|
bool init;
|
|
struct protocol_stack *bind_stack;
|
|
- sem_t event_sem;
|
|
-
|
|
- int32_t epollfd;
|
|
- bool have_kernel_fd;
|
|
+ int32_t epollfd; /* epoll kernel fd, ctl add into gazelle_kernel_event thread */
|
|
+ struct wakeup_poll *next;
|
|
|
|
/* poll */
|
|
struct pollfd *last_fds;
|
|
@@ -40,7 +51,8 @@ struct wakeup_poll {
|
|
/* epoll */
|
|
int32_t stack_fd_cnt[PROTOCOL_STACK_MAX];
|
|
struct protocol_stack *max_stack;
|
|
- struct list_node event_list; /* epoll temp use */
|
|
+ struct list_node event_list;
|
|
+ pthread_spinlock_t event_list_lock;
|
|
};
|
|
|
|
int32_t lstack_epoll_create(int32_t size);
|
|
diff --git a/src/lstack/lstack.conf b/src/lstack/lstack.conf
|
|
index 696dfb9..fdca602 100644
|
|
--- a/src/lstack/lstack.conf
|
|
+++ b/src/lstack/lstack.conf
|
|
@@ -16,6 +16,7 @@ kni_switch=0
|
|
low_power_mode=0
|
|
|
|
num_cpus="2"
|
|
+num_wakeup="3"
|
|
|
|
host_addr="192.168.1.10"
|
|
mask_addr="255.255.255.0"
|
|
diff --git a/src/lstack/netif/lstack_ethdev.c b/src/lstack/netif/lstack_ethdev.c
|
|
index 382f3bc..7938520 100644
|
|
--- a/src/lstack/netif/lstack_ethdev.c
|
|
+++ b/src/lstack/netif/lstack_ethdev.c
|
|
@@ -39,10 +39,11 @@ void eth_dev_recv(struct rte_mbuf *mbuf)
|
|
struct pbuf_custom *pc = NULL;
|
|
struct protocol_stack *stack = get_protocol_stack();
|
|
struct rte_mbuf *m = mbuf;
|
|
- uint16_t len;
|
|
+ uint16_t len, pkt_len;
|
|
|
|
+ pkt_len = (uint16_t)rte_pktmbuf_pkt_len(m);
|
|
while (m != NULL) {
|
|
- len = (uint16_t)rte_pktmbuf_pkt_len(m);
|
|
+ len = (uint16_t)rte_pktmbuf_data_len(m);
|
|
payload = rte_pktmbuf_mtod(m, void *);
|
|
pc = mbuf_to_pbuf(m);
|
|
pc->custom_free_function = gazelle_free_pbuf;
|
|
@@ -51,6 +52,7 @@ void eth_dev_recv(struct rte_mbuf *mbuf)
|
|
stack->stats.rx_allocmbuf_fail++;
|
|
break;
|
|
}
|
|
+ next->tot_len = pkt_len;
|
|
#if CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW
|
|
next->ol_flags = m->ol_flags;
|
|
#endif
|
|
@@ -71,7 +73,6 @@ void eth_dev_recv(struct rte_mbuf *mbuf)
|
|
if (ret != ERR_OK) {
|
|
LSTACK_LOG(ERR, LSTACK, "eth_dev_recv: failed to handle rx pbuf ret=%d\n", ret);
|
|
stack->stats.rx_drop++;
|
|
- pbuf_free(head);
|
|
}
|
|
}
|
|
}
|
|
@@ -181,7 +182,7 @@ int32_t ethdev_init(struct protocol_stack *stack)
|
|
|
|
if (use_ltran()) {
|
|
stack->rx_ring_used = 0;
|
|
- int32_t ret = fill_mbuf_to_ring(stack->rx_pktmbuf_pool, stack->rx_ring, VDEV_RX_QUEUE_SZ - 1);
|
|
+ int32_t ret = fill_mbuf_to_ring(stack->rx_pktmbuf_pool, stack->rx_ring, RING_SIZE(VDEV_RX_QUEUE_SZ));
|
|
if (ret != 0) {
|
|
LSTACK_LOG(ERR, LSTACK, "fill mbuf to rx_ring failed ret=%d\n", ret);
|
|
return ret;
|
|
diff --git a/src/lstack/netif/lstack_vdev.c b/src/lstack/netif/lstack_vdev.c
|
|
index 5a4e86a..287ac8f 100644
|
|
--- a/src/lstack/netif/lstack_vdev.c
|
|
+++ b/src/lstack/netif/lstack_vdev.c
|
|
@@ -42,14 +42,14 @@ static uint32_t ltran_rx_poll(struct protocol_stack *stack, struct rte_mbuf **pk
|
|
uint32_t nr_pkts;
|
|
struct rte_mbuf *free_buf[DPDK_PKT_BURST_SIZE];
|
|
|
|
- rcvd_pkts = rte_ring_en_dequeue_burst(stack->rx_ring, (void **)pkts, max_mbuf);
|
|
+ rcvd_pkts = gazelle_ring_sc_dequeue(stack->rx_ring, (void **)pkts, max_mbuf);
|
|
|
|
stack->rx_ring_used += rcvd_pkts;
|
|
if (unlikely(stack->rx_ring_used >= USED_RX_PKTS_WATERMARK)) {
|
|
- uint32_t free_cnt = LWIP_MIN(stack->rx_ring_used, DPDK_PKT_BURST_SIZE);
|
|
+ uint32_t free_cnt = LWIP_MIN(stack->rx_ring_used, RING_SIZE(DPDK_PKT_BURST_SIZE));
|
|
int32_t ret = gazelle_alloc_pktmbuf(stack->rx_pktmbuf_pool, (struct rte_mbuf **)free_buf, free_cnt);
|
|
if (likely(ret == 0)) {
|
|
- nr_pkts = rte_ring_en_enqueue_bulk(stack->rx_ring, (void **)free_buf, free_cnt);
|
|
+ nr_pkts = gazelle_ring_sp_enqueue(stack->rx_ring, (void **)free_buf, free_cnt);
|
|
stack->rx_ring_used -= nr_pkts;
|
|
} else {
|
|
stack->stats.rx_allocmbuf_fail++;
|
|
@@ -72,14 +72,14 @@ static uint32_t ltran_tx_xmit(struct protocol_stack *stack, struct rte_mbuf **pk
|
|
|
|
do {
|
|
if (unlikely(stack->tx_ring_used >= INUSE_TX_PKTS_WATERMARK)) {
|
|
- uint32_t free_pkts = rte_ring_en_dequeue_burst(stack->tx_ring, (void **)free_buf, stack->tx_ring_used);
|
|
+ uint32_t free_pkts = gazelle_ring_sc_dequeue(stack->tx_ring, (void **)free_buf, stack->tx_ring_used);
|
|
for (uint32_t i = 0; i < free_pkts; i++) {
|
|
rte_pktmbuf_free(free_buf[i]);
|
|
}
|
|
stack->tx_ring_used -= free_pkts;
|
|
}
|
|
|
|
- sent_pkts += rte_ring_en_enqueue_bulk(stack->tx_ring, (void **)(&pkts[sent_pkts]), nr_pkts - sent_pkts);
|
|
+ sent_pkts += gazelle_ring_sp_enqueue(stack->tx_ring, (void **)(&pkts[sent_pkts]), nr_pkts - sent_pkts);
|
|
} while ((sent_pkts < nr_pkts) && (ENQUEUE_RING_RETRY_TIMEOUT > sys_now() - tbegin) && get_register_state());
|
|
|
|
stack->tx_ring_used += sent_pkts;
|
|
@@ -128,7 +128,7 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
|
|
}
|
|
|
|
do {
|
|
- (void)rte_ring_en_dequeue_burst(stack->reg_ring, free_buf, VDEV_REG_QUEUE_SZ);
|
|
+ (void)gazelle_ring_sc_dequeue(stack->reg_ring, free_buf, VDEV_REG_QUEUE_SZ);
|
|
|
|
if (get_reg_ring_free_count(stack->reg_ring) == 0) {
|
|
continue;
|
|
@@ -144,7 +144,7 @@ int32_t vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple)
|
|
}
|
|
|
|
free_buf[0] = tmp_buf;
|
|
- sent_pkts = rte_ring_en_enqueue_bulk(stack->reg_ring, free_buf, 1);
|
|
+ sent_pkts = gazelle_ring_sp_enqueue(stack->reg_ring, free_buf, 1);
|
|
} while ((sent_pkts < 1) && (ENQUEUE_RING_RETRY_TIMEOUT > sys_now() - tbegin) && get_register_state());
|
|
|
|
if (sent_pkts == 1) {
|
|
diff --git a/src/ltran/ltran_dfx.c b/src/ltran/ltran_dfx.c
|
|
index 8d71966..7db1adc 100644
|
|
--- a/src/ltran/ltran_dfx.c
|
|
+++ b/src/ltran/ltran_dfx.c
|
|
@@ -546,32 +546,28 @@ static void gazelle_print_lstack_stat_brief(struct gazelle_stat_lstack_total *st
|
|
static void show_lstack_stats(struct gazelle_stack_dfx_data *lstack_stat)
|
|
{
|
|
printf("\n------ stack tid: %6u ------\n", lstack_stat->tid);
|
|
- printf("rx_pkts: %-20"PRIu64" ", lstack_stat->data.pkts.rx);
|
|
- printf("rx_drop: %-20"PRIu64" ", lstack_stat->data.pkts.rx_drop);
|
|
- printf("rx_allocmbuf_fail: %-10"PRIu64"\n", lstack_stat->data.pkts.rx_allocmbuf_fail);
|
|
- printf("tx_pkts: %-20"PRIu64" ", lstack_stat->data.pkts.tx);
|
|
- printf("tx_drop: %-20"PRIu64" ", lstack_stat->data.pkts.tx_drop);
|
|
- printf("tx_allocmbuf_fail: %-10"PRIu64"\n", lstack_stat->data.pkts.tx_allocmbuf_fail);
|
|
- printf("app_read: %-19"PRIu64" ", lstack_stat->data.pkts.app_read_cnt);
|
|
- printf("read_lwip: %-18"PRIu64" ", lstack_stat->data.pkts.read_lwip_cnt);
|
|
- printf("read_lwip_drop: %-13"PRIu64" \n", lstack_stat->data.pkts.read_lwip_drop);
|
|
- printf("app_write: %-18"PRIu64" ", lstack_stat->data.pkts.app_write_cnt);
|
|
- printf("write_lwip: %-17"PRIu64" ", lstack_stat->data.pkts.write_lwip_cnt);
|
|
- printf("app_get_idlefail: %-11"PRIu64" \n", lstack_stat->data.pkts.app_write_idlefail);
|
|
- printf("app_write_drop: %-13"PRIu64" ", lstack_stat->data.pkts.app_write_drop);
|
|
- printf("write_lwip_drop: %-12"PRIu64" ", lstack_stat->data.pkts.write_lwip_drop);
|
|
- printf("app_write_idlebuf: %-10"PRIu16" \n", lstack_stat->data.pkts.send_idle_ring_cnt);
|
|
- printf("event_list: %-17"PRIu64" ", lstack_stat->data.pkts.event_list);
|
|
- printf("recv_list: %-18"PRIu64" ", lstack_stat->data.pkts.recv_list);
|
|
+ printf("rx_pkts: %-20"PRIu64" ", lstack_stat->data.pkts.stack_stat.rx);
|
|
+ printf("rx_drop: %-20"PRIu64" ", lstack_stat->data.pkts.stack_stat.rx_drop);
|
|
+ printf("rx_allocmbuf_fail: %-10"PRIu64"\n", lstack_stat->data.pkts.stack_stat.rx_allocmbuf_fail);
|
|
+ printf("tx_pkts: %-20"PRIu64" ", lstack_stat->data.pkts.stack_stat.tx);
|
|
+ printf("tx_drop: %-20"PRIu64" ", lstack_stat->data.pkts.stack_stat.tx_drop);
|
|
+ printf("tx_allocmbuf_fail: %-10"PRIu64"\n", lstack_stat->data.pkts.stack_stat.tx_allocmbuf_fail);
|
|
+ printf("app_read: %-19"PRIu64" ", lstack_stat->data.pkts.wakeup_stat.app_read_cnt);
|
|
+ printf("read_lwip: %-18"PRIu64" ", lstack_stat->data.pkts.stack_stat.read_lwip_cnt);
|
|
+ printf("read_lwip_drop: %-13"PRIu64" \n", lstack_stat->data.pkts.stack_stat.read_lwip_drop);
|
|
+ printf("app_write: %-18"PRIu64" ", lstack_stat->data.pkts.wakeup_stat.app_write_cnt);
|
|
+ printf("write_lwip: %-17"PRIu64" ", lstack_stat->data.pkts.stack_stat.write_lwip_cnt);
|
|
+ printf("app_get_idlefail: %-11"PRIu64" \n", lstack_stat->data.pkts.wakeup_stat.app_write_idlefail);
|
|
+ printf("recv_list: %-18"PRIu64" ", lstack_stat->data.pkts.recv_list_cnt);
|
|
+ printf("send_list: %-18"PRIu64" ", lstack_stat->data.pkts.send_list_cnt);
|
|
printf("conn_num: %-19"PRIu16" \n", lstack_stat->data.pkts.conn_num);
|
|
- printf("wakeup_events: %-14"PRIu64" ", lstack_stat->data.pkts.wakeup_events);
|
|
- printf("app_events: %-17"PRIu64" ", lstack_stat->data.pkts.app_events);
|
|
- printf("read_null: %-18"PRIu64" \n", lstack_stat->data.pkts.read_null);
|
|
+ printf("wakeup_events: %-14"PRIu64" ", lstack_stat->data.pkts.stack_stat.wakeup_events);
|
|
+ printf("app_events: %-17"PRIu64" ", lstack_stat->data.pkts.wakeup_stat.app_events);
|
|
+ printf("read_null: %-18"PRIu64" \n", lstack_stat->data.pkts.wakeup_stat.read_null);
|
|
printf("call_msg: %-19"PRIu64" ", lstack_stat->data.pkts.call_msg_cnt);
|
|
printf("call_alloc_fail: %-12"PRIu64" ", lstack_stat->data.pkts.call_alloc_fail);
|
|
- printf("call_null: %-18"PRIu64" \n", lstack_stat->data.pkts.call_null);
|
|
- printf("send_self_rpc: %-14"PRIu64" ", lstack_stat->data.pkts.send_self_rpc);
|
|
- printf("send_list: %-18"PRIu64" \n", lstack_stat->data.pkts.send_list);
|
|
+ printf("call_null: %-18"PRIu64" \n", lstack_stat->data.pkts.stack_stat.call_null);
|
|
+ printf("send_self_rpc: %-14"PRIu64" \n", lstack_stat->data.pkts.stack_stat.send_self_rpc);
|
|
}
|
|
|
|
static void gazelle_print_lstack_stat_detail(struct gazelle_stack_dfx_data *lstack_stat,
|
|
@@ -873,8 +869,8 @@ static void gazelle_print_lstack_stat_conn(void *buf, const struct gazelle_stat_
|
|
printf("Active Internet connections (servers and established)\n");
|
|
do {
|
|
printf("\n------ stack tid: %6u ------\n", stat->tid);
|
|
- printf("No. Proto recv_cnt recv_ring in_send send_ring sem_cnt Local Address "
|
|
- " Foreign Address State\n");
|
|
+ printf("No. Proto recv_cnt recv_ring in_send send_ring sem_cnt fd Local Address "
|
|
+ " Foreign Address State\n");
|
|
uint32_t unread_pkts = 0;
|
|
uint32_t unsend_pkts = 0;
|
|
for (i = 0; i < conn->conn_num && i < GAZELLE_LSTACK_MAX_CONN; i++) {
|
|
@@ -883,13 +879,13 @@ static void gazelle_print_lstack_stat_conn(void *buf, const struct gazelle_stat_
|
|
rip.s_addr = conn_info->rip;
|
|
lip.s_addr = conn_info->lip;
|
|
if ((conn_info->state == GAZELLE_ACTIVE_LIST) || (conn_info->state == GAZELLE_TIME_WAIT_LIST)) {
|
|
- printf("%-6utcp %-10u%-11u%-9u%-11u%-9d%s:%hu\t%s:%hu\t%s\n", i, conn_info->recv_cnt,
|
|
+ printf("%-6utcp %-10u%-11u%-9u%-11u%-9d%-7d%s:%hu\t %s:%hu\t %s\n", i, conn_info->recv_cnt,
|
|
conn_info->recv_ring_cnt, conn_info->in_send, conn_info->send_ring_cnt, conn_info->sem_cnt,
|
|
- inet_ntop(AF_INET, &lip, str_ip, sizeof(str_ip)), conn_info->l_port,
|
|
+ conn_info->fd, inet_ntop(AF_INET, &lip, str_ip, sizeof(str_ip)), conn_info->l_port,
|
|
inet_ntop(AF_INET, &rip, str_rip, sizeof(str_rip)), conn_info->r_port,
|
|
tcp_state_to_str(conn_info->tcp_sub_state));
|
|
} else if (conn_info->state == GAZELLE_LISTEN_LIST) {
|
|
- printf("%-6utcp %-50u%s:%hu\t0.0.0.0:*\t\tLISTEN\n", i, conn_info->recv_cnt,
|
|
+ printf("%-6utcp %-57u%s:%hu\t 0.0.0.0:*\t\t LISTEN\n", i, conn_info->recv_cnt,
|
|
inet_ntop(AF_INET, &lip, str_ip, sizeof(str_ip)), conn_info->l_port);
|
|
} else {
|
|
printf("Got unknow tcp conn::%s:%5hu, state:%u\n",
|
|
diff --git a/src/ltran/ltran_forward.c b/src/ltran/ltran_forward.c
|
|
index b264ad3..776692d 100644
|
|
--- a/src/ltran/ltran_forward.c
|
|
+++ b/src/ltran/ltran_forward.c
|
|
@@ -92,7 +92,7 @@ static __rte_always_inline void backup_bufs_enque_rx_ring(struct gazelle_stack *
|
|
struct rte_mbuf *free_buf[RING_MAX_SIZE];
|
|
|
|
flush_cnt = (stack->backup_pkt_cnt < RING_MAX_SIZE) ? stack->backup_pkt_cnt : RING_MAX_SIZE;
|
|
- free_cnt = rte_ring_cn_dequeue_burst(stack->rx_ring, (void **)free_buf, flush_cnt);
|
|
+ free_cnt = gazelle_ring_read(stack->rx_ring, (void **)free_buf, flush_cnt);
|
|
|
|
for (uint32_t j = 0; j < free_cnt; j++) {
|
|
index = (stack->backup_start + j) % backup_size;
|
|
@@ -102,7 +102,7 @@ static __rte_always_inline void backup_bufs_enque_rx_ring(struct gazelle_stack *
|
|
stack->stack_stats.rx += free_cnt;
|
|
stack->backup_pkt_cnt -= free_cnt;
|
|
stack->backup_start = (stack->backup_start + free_cnt) % backup_size;
|
|
- rte_ring_cn_enqueue(stack->rx_ring);
|
|
+ gazelle_ring_read_over(stack->rx_ring);
|
|
}
|
|
|
|
static __rte_always_inline void pktbufs_move_to_backup_bufs(struct gazelle_stack *stack, struct rte_mbuf **mbuf,
|
|
@@ -135,7 +135,7 @@ static __rte_always_inline uint32_t pkt_bufs_enque_rx_ring(struct gazelle_stack
|
|
struct rte_mbuf **cl_buffer = stack->pkt_buf;
|
|
struct rte_mbuf *free_buf[GAZELLE_PACKET_READ_SIZE];
|
|
|
|
- free_cnt = rte_ring_cn_dequeue_burst(stack->rx_ring, (void **)free_buf, stack->pkt_cnt);
|
|
+ free_cnt = gazelle_ring_read(stack->rx_ring, (void **)free_buf, stack->pkt_cnt);
|
|
stack->stack_stats.rx += free_cnt;
|
|
|
|
/* this prefetch and copy code, only 50~60 instruction, but never spend less than 70 cycle.
|
|
@@ -187,7 +187,7 @@ static __rte_always_inline uint32_t pkt_bufs_enque_rx_ring(struct gazelle_stack
|
|
}
|
|
|
|
if (likely(free_cnt != 0)) {
|
|
- rte_ring_cn_enqueue(stack->rx_ring);
|
|
+ gazelle_ring_read_over(stack->rx_ring);
|
|
}
|
|
|
|
return free_cnt;
|
|
@@ -520,14 +520,14 @@ static __rte_always_inline void tcp_hash_table_handle(struct gazelle_stack *stac
|
|
return;
|
|
}
|
|
|
|
- uint32_t num = rte_ring_cn_dequeue_burst(stack->reg_ring, pkts, PACKET_READ_SIZE);
|
|
+ uint32_t num = gazelle_ring_read(stack->reg_ring, pkts, PACKET_READ_SIZE);
|
|
|
|
for (uint32_t i = 0; i < num; i++) {
|
|
tcp_hash_table_modify(stack, pkts[i]);
|
|
pkts[i] = NULL;
|
|
}
|
|
|
|
- rte_ring_cn_enqueue(stack->reg_ring);
|
|
+ gazelle_ring_read_over(stack->reg_ring);
|
|
if (pthread_mutex_unlock(&sock_htable->mlock) != 0) {
|
|
LTRAN_WARN("write tcp_htable: unlock failed, errno %d\n", errno);
|
|
}
|
|
@@ -675,7 +675,7 @@ static __rte_always_inline void downstream_forward_one(struct gazelle_stack *sta
|
|
uint32_t used_cnt;
|
|
|
|
struct rte_mbuf *used_pkts[GAZELLE_PACKET_READ_SIZE];
|
|
- used_cnt = rte_ring_cn_dequeue_burst(stack->tx_ring, (void **)used_pkts, GAZELLE_PACKET_READ_SIZE);
|
|
+ used_cnt = gazelle_ring_read(stack->tx_ring, (void **)used_pkts, GAZELLE_PACKET_READ_SIZE);
|
|
if (used_cnt == 0) {
|
|
return;
|
|
}
|
|
@@ -686,7 +686,7 @@ static __rte_always_inline void downstream_forward_one(struct gazelle_stack *sta
|
|
if (ret != 0) {
|
|
/* free pkts that not have be sent. */
|
|
LTRAN_ERR("down alloc error, rx_pkts:%u ret=%d.\n", used_cnt, ret);
|
|
- rte_ring_cn_enqueue(stack->tx_ring);
|
|
+ gazelle_ring_read_over(stack->tx_ring);
|
|
stack->stack_stats.tx_drop += used_cnt;
|
|
rte_exit(EXIT_FAILURE, "down alloc error\n");
|
|
}
|
|
@@ -696,7 +696,7 @@ static __rte_always_inline void downstream_forward_one(struct gazelle_stack *sta
|
|
tx_bytes += used_pkts[tx_pkts]->data_len;
|
|
stack->stack_stats.tx_bytes += used_pkts[tx_pkts]->data_len;
|
|
}
|
|
- rte_ring_cn_enqueue(stack->tx_ring);
|
|
+ gazelle_ring_read_over(stack->tx_ring);
|
|
|
|
/* send packets anyway. */
|
|
tx_pkts = 0;
|
|
diff --git a/src/ltran/ltran_stat.c b/src/ltran/ltran_stat.c
|
|
index 7080424..c6805a6 100644
|
|
--- a/src/ltran/ltran_stat.c
|
|
+++ b/src/ltran/ltran_stat.c
|
|
@@ -25,6 +25,7 @@
|
|
#include "gazelle_dfx_msg.h"
|
|
#include "ltran_timer.h"
|
|
#include "ltran_ethdev.h"
|
|
+#include "dpdk_common.h"
|
|
#include "ltran_forward.h"
|
|
|
|
static uint64_t g_start_time_stamp = 0;
|
|
@@ -32,25 +33,11 @@ static int32_t g_start_latency = GAZELLE_OFF;
|
|
volatile int32_t g_ltran_stop_flag = GAZELLE_FALSE;
|
|
static struct statistics g_statistics;
|
|
|
|
-static uint32_t get_rx_ring_count(const struct rte_ring *r)
|
|
-{
|
|
- return rte_ring_count(r);
|
|
-}
|
|
-
|
|
uint64_t get_start_time_stamp(void)
|
|
{
|
|
return g_start_time_stamp;
|
|
}
|
|
|
|
-static uint32_t get_tx_ring_count(const struct rte_ring *r)
|
|
-{
|
|
- uint32_t prod_tail = r->prod.tail;
|
|
- uint32_t cons_head = r->cons.head;
|
|
-
|
|
- uint32_t count = (cons_head - prod_tail) & r->mask;
|
|
- return (count > r->capacity) ? r->capacity : count;
|
|
-}
|
|
-
|
|
void set_start_latency_flag(int32_t flag)
|
|
{
|
|
struct gazelle_instance_mgr *instance_mgr = get_instance_mgr();
|
|
@@ -203,8 +190,8 @@ static int32_t gazelle_filling_lstack_stat_total(struct gazelle_stat_lstack_tota
|
|
stat->latency_pkts = stack->stack_stats.latency_pkts;
|
|
stat->latency_total = stack->stack_stats.latency_total;
|
|
stat->reg_ring_cnt = rte_ring_cn_count(stack->reg_ring);
|
|
- stat->rx_ring_cnt = get_rx_ring_count(stack->rx_ring);
|
|
- stat->tx_ring_cnt = get_tx_ring_count(stack->tx_ring);
|
|
+ stat->rx_ring_cnt = gazelle_ring_readover_count(stack->rx_ring);
|
|
+ stat->tx_ring_cnt = gazelle_ring_readable_count(stack->tx_ring);
|
|
|
|
return GAZELLE_OK;
|
|
}
|
|
--
|
|
2.23.0
|
|
|