switchless schedule policy and asynchronous calls

2022-11-10 21:36:10 +08:00 · 2022-11-10 21:36:10 +08:00 · 059dc1cc44
commit 059dc1cc44
parent db6a49558f
5 changed files with 2249 additions and 1 deletions
--- a/0050-switchless-schedule-policy.patch
+++ b/0050-switchless-schedule-policy.patch
@ -0,0 +1,559 @@
+From 5b5caa5bc583c4a7dec5e3050dae619fa00d8424 Mon Sep 17 00:00:00 2001
+From: modric <wangyu283@huawei.com>
+Date: Tue, 18 Oct 2022 21:54:45 +0800
+Subject: [PATCH 1/4] switchless schedule policy
+
+Signed-off-by: modric <wangyu283@huawei.com>
+---
+ inc/common_inc/bit_operation.h                |   9 ++
+ inc/common_inc/switchless_defs.h              |  49 ++++---
+ inc/host_inc/secgear_defs.h                   |   1 +
+ inc/host_inc/secgear_uswitchless.h            |  12 +-
+ .../gp/itrustee/itrustee_tswitchless.c        | 134 ++++++++++++++++--
+ src/host_src/gp/gp_enclave.c                  |  18 +--
+ src/host_src/gp/gp_enclave.h                  |   2 +
+ src/host_src/gp/gp_uswitchless.c              |  19 +--
+ src/host_src/gp/gp_uswitchless.h              |   7 +-
+ 9 files changed, 205 insertions(+), 46 deletions(-)
+
+diff --git a/inc/common_inc/bit_operation.h b/inc/common_inc/bit_operation.h
+index baa6a1b..ab20121 100644
+--- a/inc/common_inc/bit_operation.h
+++ b/inc/common_inc/bit_operation.h
+@@ -37,6 +37,15 @@ static inline uint32_t count_tailing_zeroes(uint64_t value)
+     return (uint32_t)__builtin_ctzll(value);
+ }
+ 
+/*
+ * Returns the number of 1-bits in value.
+ */
+static inline uint32_t count_ones(uint64_t value)
+{
+    ASSERT(value != 0);
+    return (uint32_t)__builtin_popcountll(value);
+}
+
+ /*
+  * Returns the number of leading 0-bits in x, starting at the most significant bit position.
+  * If x is 0, the result is undefined.
+diff --git a/inc/common_inc/switchless_defs.h b/inc/common_inc/switchless_defs.h
+index c7e9dfc..84629c3 100644
+--- a/inc/common_inc/switchless_defs.h
+++ b/inc/common_inc/switchless_defs.h
+@@ -16,28 +16,45 @@
+ #include <stdint.h>
+ #include <stdbool.h>
+ 
+#include "secgear_uswitchless.h"
+
+ #ifdef __cplusplus
+ extern "C" {
+ #endif
+ 
+ #define SWITCHLESS_BITS_IN_QWORD 64
+ 
+-typedef struct {
+-    uint32_t num_uworkers;  // number of untrusted (for ocalls) worker threads
+-    uint32_t num_tworkers; // number of trusted (for ecalls) worker threads
+-    uint32_t call_pool_size_qwords; // number of switchless calls pool size (actual number is x 64)
+-    uint32_t num_max_params; // max number of parameters
+-} sl_task_pool_config_t;
+/*
+ *                        sl_task_pool_t      free_bit_buf
+ *                        |                   |
+ *                        v                   v
+ *                        +-------------------+-+-+--------+-+----------------+-+
+ *                  +---- | task_buf          | | |        | |                | |
+ *                  | +-- | pool_buf          |0|1|  ...   |1|       ...      |1|   normal memory
+ *                  | |   +-------------------+-+-+--------+-+----------------+-+
+ *                  | |
+ *                  | |                       signal_bit_buf
+ *                  | |                       |
+ *                  | |                       v
+ *                  | +-> +-------------------+-+-+--------+-+----------------+-+
+ *                  |     |                   | | |        | |                | |
+ *                  |     | cc_sl_config_t    |1|0|  ...   |0|       ...      |0|
+ *                  +---> +--------+---------++-+-+---+----+-+--+--------+----+-+
+ *                task[0] | status | func id | retval | params1 | prams2 | ...  |   shared memory
+ *                        +--------+---------+--------+---------+--------+------+
+ *                task[n] |                          ...                        |
+ *                        +-----------------------------------------------------+
+ */
+ 
+ typedef struct {
+-    char *pool_buf; // switchless task pool control area
+    char *pool_buf; // switchless task pool control area, includes configuration area, signal bit area, and task area
+     char *task_buf; // part of pool_buf, stores invoking tasks
+-    uint64_t *free_bit_buf; // idle task flag
+-    uint64_t *signal_bit_buf; // to-be-processed task flag
+-    uint32_t bit_buf_size; // size of each task flag area
+-    uint32_t per_task_size; // size of each task
+    uint64_t *free_bit_buf; // length is bit_buf_size, the task indicated by the bit subscript is idle
+    uint64_t *signal_bit_buf; // length is bit_buf_size, the task indicated by the bit subscript is to be processed
+    uint32_t bit_buf_size; // size of each bit buf in bytes, determined by sl_call_pool_size_qwords in cc_sl_config_t
+    uint32_t per_task_size; // size of each task in bytes, for details, see task[0]
+     volatile bool need_stop_tworkers; // indicates whether to stop the trusted proxy thread
+-    sl_task_pool_config_t pool_cfg;
+    cc_sl_config_t pool_cfg;
+ } sl_task_pool_t;
+ 
+ typedef struct {
+@@ -65,12 +82,12 @@ typedef enum {
+  * Return:
+  *     pool size in bytes
+  */
+-inline size_t sl_get_pool_buf_len_by_config(sl_task_pool_config_t *pool_cfg)
+inline size_t sl_get_pool_buf_len_by_config(cc_sl_config_t *pool_cfg)
+ {
+-    size_t signal_bit_buf_size = pool_cfg->call_pool_size_qwords * sizeof(uint64_t);
+    size_t signal_bit_buf_size = pool_cfg->sl_call_pool_size_qwords * sizeof(uint64_t);
+     size_t each_task_size = SL_CALCULATE_PER_TASK_SIZE(pool_cfg);
+-    size_t task_buf_size = each_task_size * pool_cfg->call_pool_size_qwords * SWITCHLESS_BITS_IN_QWORD;
+-    return sizeof(sl_task_pool_config_t) + signal_bit_buf_size + task_buf_size;
+    size_t task_buf_size = each_task_size * pool_cfg->sl_call_pool_size_qwords * SWITCHLESS_BITS_IN_QWORD;
+    return sizeof(cc_sl_config_t) + signal_bit_buf_size + task_buf_size;
+ }
+ 
+ /*
+diff --git a/inc/host_inc/secgear_defs.h b/inc/host_inc/secgear_defs.h
+index 4ca11a8..183646d 100644
+--- a/inc/host_inc/secgear_defs.h
+++ b/inc/host_inc/secgear_defs.h
+@@ -32,6 +32,7 @@ extern "C" {
+ #define CC_MUTEX_UNLOCK(lock)           CC_IGNORE(pthread_mutex_unlock(lock))
+ #define CC_COND_INIT(cond, attr)        CC_IGNORE(pthread_cond_init(cond, attr))
+ #define CC_COND_SIGNAL(cond)            CC_IGNORE(pthread_cond_signal(cond))
+#define CC_COND_BROADCAST(cond)         CC_IGNORE(pthread_cond_broadcast(cond))
+ #define CC_COND_WAIT(cond, mtx_lock)    CC_IGNORE(pthread_cond_wait(cond, mtx_lock))
+ #define CC_COND_DESTROY(cond)           CC_IGNORE(pthread_cond_destroy(cond))
+ #define CC_THREAD_ATTR_INIT(attr)       CC_IGNORE(pthread_attr_init(attr))
+diff --git a/inc/host_inc/secgear_uswitchless.h b/inc/host_inc/secgear_uswitchless.h
+index bff563c..8e21fd9 100644
+--- a/inc/host_inc/secgear_uswitchless.h
+++ b/inc/host_inc/secgear_uswitchless.h
+@@ -47,6 +47,13 @@
+ extern "C" {
+ #endif
+ 
+typedef enum {
+    /* Worker threads work all the time. */
+    WORKERS_POLICY_BUSY,
+    /* The worker thread is only woken up when the task arrives and goes to sleep after the task is processed. */
+    WORKERS_POLICY_WAKEUP,
+    WORKERS_POLICY_MAX
+} cc_workers_policy_t;
+ 
+ typedef struct {
+     /* number of untrusted (for ocalls) worker threads */
+@@ -72,9 +79,12 @@ typedef struct {
+      * before going to sleep, only for SGX
+      */
+     uint32_t retries_before_sleep;
+
+    /* Worker thread scheduling policy, refer to cc_workers_policy_t, only for GP */
+    uint64_t workers_policy;
+ } cc_sl_config_t;
+ 
+-#define CC_USWITCHLESS_CONFIG_INITIALIZER   {1, 1, 1, 16, 0, 0}
+#define CC_USWITCHLESS_CONFIG_INITIALIZER   {1, 1, 1, 16, 0, 0, WORKERS_POLICY_BUSY}
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/src/enclave_src/gp/itrustee/itrustee_tswitchless.c b/src/enclave_src/gp/itrustee/itrustee_tswitchless.c
+index 5ccb519..3955ff9 100644
+--- a/src/enclave_src/gp/itrustee/itrustee_tswitchless.c
+++ b/src/enclave_src/gp/itrustee/itrustee_tswitchless.c
+@@ -16,6 +16,7 @@
+ #include <stdarg.h>
+ #include <stdlib.h>
+ #include <time.h>
+#include <sys/time.h>
+ #include "secgear_defs.h"
+ #include "switchless_defs.h"
+ #include "bit_operation.h"
+@@ -55,6 +56,9 @@
+ #define TEESMP_THREAD_ATTR_TASK_ID TEESMP_THREAD_ATTR_TASK_ID_INHERIT
+ #endif
+ 
+static pthread_mutex_t g_sched_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t g_sched_cond = PTHREAD_COND_INITIALIZER;
+
+ static sl_task_pool_t *tswitchless_init_pool(void *pool_buf)
+ {
+     sl_task_pool_t *pool = (sl_task_pool_t *)calloc(1, sizeof(sl_task_pool_t));
+@@ -63,25 +67,39 @@ static sl_task_pool_t *tswitchless_init_pool(void *pool_buf)
+         return NULL;
+     }
+ 
+-    sl_task_pool_config_t *pool_cfg = (sl_task_pool_config_t *)pool_buf;
+    cc_sl_config_t *pool_cfg = (cc_sl_config_t *)pool_buf;
+ 
+     pool->pool_cfg = *pool_cfg;
+-    pool->bit_buf_size = pool_cfg->call_pool_size_qwords * sizeof(uint64_t);
+    pool->bit_buf_size = pool_cfg->sl_call_pool_size_qwords * sizeof(uint64_t);
+     pool->per_task_size = SL_CALCULATE_PER_TASK_SIZE(pool_cfg);
+ 
+     pool->pool_buf = (char *)pool_buf;
+-    pool->signal_bit_buf = (uint64_t *)(pool->pool_buf + sizeof(sl_task_pool_config_t));
+    pool->signal_bit_buf = (uint64_t *)(pool->pool_buf + sizeof(cc_sl_config_t));
+     pool->task_buf = (char *)pool->signal_bit_buf + pool->bit_buf_size;
+ 
+     return pool;
+ }
+ 
+static bool tswitchless_is_workers_policy_wakeup(cc_sl_config_t *cfg)
+{
+    return cfg->workers_policy == WORKERS_POLICY_WAKEUP;
+}
+
+ static void tswitchless_fini_workers(sl_task_pool_t *pool, pthread_t *tids)
+ {
+     int ret;
+     uint32_t thread_num = pool->pool_cfg.num_tworkers;
+     pool->need_stop_tworkers = true;
+ 
+    if (tswitchless_is_workers_policy_wakeup(&(pool->pool_cfg))) {
+        thread_num += 1;
+
+        // Wakes all dormant worker threads and informs it to exit
+        CC_MUTEX_LOCK(&g_sched_lock);
+        CC_COND_BROADCAST(&g_sched_cond);
+        CC_MUTEX_UNLOCK(&g_sched_lock);
+    }
+
+     for (uint32_t i = 0; i < thread_num; ++i) {
+         if (tids[i] != NULL) {
+             ret = pthread_join(tids[i], NULL);
+@@ -99,7 +117,7 @@ static inline sl_task_t *tswitchless_get_task_by_index(sl_task_pool_t *pool, int
+ 
+ static int tswitchless_get_pending_task(sl_task_pool_t *pool)
+ {
+-    int call_pool_size_qwords = (int)pool->pool_cfg.call_pool_size_qwords;
+    int call_pool_size_qwords = (int)pool->pool_cfg.sl_call_pool_size_qwords;
+     uint64_t *signal_bit_buf = pool->signal_bit_buf;
+     int start_bit = 0;
+     int end_bit = 0;
+@@ -156,6 +174,10 @@ static void tswitchless_proc_task(sl_task_t *task)
+ 
+ static int thread_num = 0;
+ 
+#define TSWITCHLESS_TIMEOUT_IN_USEC 500000
+#define TSWITCHLESS_USEC_PER_SEC 1000000
+#define TSWITCHLESS_GETTIME_PER_CNT 10000000
+
+ static void *tswitchless_thread_routine(void *data)
+ {
+     int thread_index = __atomic_add_fetch(&thread_num, 1, __ATOMIC_ACQ_REL);
+@@ -165,14 +187,45 @@ static void *tswitchless_thread_routine(void *data)
+     sl_task_t *task_buf = NULL;
+     sl_task_pool_t *pool = (sl_task_pool_t *)data;
+     int processed_count = 0;
+    bool is_workers_policy_wakeup = tswitchless_is_workers_policy_wakeup(&(pool->pool_cfg));
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int count = 0;
+    bool timeout = true;
+ 
+     while (true) {
+         if (pool->need_stop_tworkers) {
+             break;
+         }
+ 
+        count++;
+         task_index = tswitchless_get_pending_task(pool);
+         if (task_index == -1) {
+            /*
+             * If the scheduling policy is WORKERS_POLICY_WAKEUP, After the task is processed,
+             * wait for a period of time before exiting. A new task may arrive immediately.
+             * This reduces the performance loss caused by frequent sleep and wakeup between threads.
+             */
+            if (is_workers_policy_wakeup && count > TSWITCHLESS_GETTIME_PER_CNT) {
+                gettimeofday(&tval_after, NULL);
+                timersub(&tval_after, &tval_before, &duration);
+                timeout =
+                    (duration.tv_sec * TSWITCHLESS_USEC_PER_SEC + duration.tv_usec) >= TSWITCHLESS_TIMEOUT_IN_USEC;
+
+                count = 0;
+            }
+
+            if (is_workers_policy_wakeup && timeout) {
+                CC_MUTEX_LOCK(&g_sched_lock);
+                CC_COND_WAIT(&g_sched_cond, &g_sched_lock);
+                CC_MUTEX_UNLOCK(&g_sched_lock);
+
+                gettimeofday(&tval_before, NULL);
+                count = 0;
+                timeout = false;
+            }
+
+             continue;
+         }
+ 
+@@ -189,12 +242,76 @@ static void *tswitchless_thread_routine(void *data)
+     return NULL;
+ }
+ 
+static inline int tswitchless_get_total_pending_task(sl_task_pool_t *pool)
+{
+    int count = 0;
+    int call_pool_size_qwords = (int)pool->pool_cfg.sl_call_pool_size_qwords;
+    uint64_t *signal_bit_buf = pool->signal_bit_buf;
+    uint64_t element_val = 0;
+
+    for (int i = 0; i < call_pool_size_qwords; ++i) {
+        element_val = *(signal_bit_buf + i);
+
+        if (element_val == 0) {
+            continue;
+        }
+
+        count += count_ones(element_val);
+    }
+
+    return count;
+}
+
+static void *tswitchless_thread_scheduler(void *data)
+{
+    SLogTrace("Enter scheduler tworker.");
+
+    int task_num;
+    sl_task_pool_t *pool = (sl_task_pool_t *)data;
+
+    while (true) {
+        if (pool->need_stop_tworkers) {
+            break;
+        }
+
+        task_num = tswitchless_get_total_pending_task(pool);
+        if (task_num == 0) {
+            continue;
+        } else {
+            CC_MUTEX_LOCK(&g_sched_lock);
+            CC_COND_BROADCAST(&g_sched_cond);
+            CC_MUTEX_UNLOCK(&g_sched_lock);
+        }
+    }
+
+    SLogTrace("Exit scheduler tworker.");
+
+    return NULL;
+}
+
+typedef void *(*TSWITCHLESS_THREAD_FUNC)(void *data);
+
+TSWITCHLESS_THREAD_FUNC tswitchless_get_thread_func(bool is_sched)
+{
+    if (is_sched) {
+        return tswitchless_thread_scheduler;
+    }
+
+    return tswitchless_thread_routine;
+}
+
+ static pthread_t *tswitchless_init_workers(sl_task_pool_t *pool)
+ {
+     int ret;
+-    sl_task_pool_config_t *pool_cfg = &pool->pool_cfg;
+    cc_sl_config_t *pool_cfg = &pool->pool_cfg;
+    uint32_t thread_count = pool_cfg->num_tworkers;
+    bool is_workers_policy_wakeup = tswitchless_is_workers_policy_wakeup(pool_cfg);
+
+    if (is_workers_policy_wakeup) {
+        thread_count += 1;
+    }
+ 
+-    pthread_t *tids = (pthread_t *)calloc(pool_cfg->num_tworkers, sizeof(pthread_t));
+    pthread_t *tids = (pthread_t *)calloc(thread_count, sizeof(pthread_t));
+     if (tids == NULL) {
+         SLogError("Malloc memory for tworkers failed.");
+         return NULL;
+@@ -214,8 +331,9 @@ static pthread_t *tswitchless_init_workers(sl_task_pool_t *pool)
+         return NULL;
+     }
+ 
+-    for (uint32_t i = 0; i < pool_cfg->num_tworkers; ++i) {
+-        ret = pthread_create(tids + i, &attr, tswitchless_thread_routine, pool);
+    for (uint32_t i = 0; i < thread_count; ++i) {
+        // If the policy is WORKERS_POLICY_WAKEUP, the first is the scheduling thread.
+        ret = pthread_create(tids + i, &attr, tswitchless_get_thread_func(is_workers_policy_wakeup && i == 0), pool);
+         if (ret != 0) {
+             tswitchless_fini_workers(pool, tids);
+             free(tids);
+diff --git a/src/host_src/gp/gp_enclave.c b/src/host_src/gp/gp_enclave.c
+index 27952e7..f77cdd8 100644
+--- a/src/host_src/gp/gp_enclave.c
+++ b/src/host_src/gp/gp_enclave.c
+@@ -10,6 +10,8 @@
+  * See the Mulan PSL v2 for more details.
+  */
+ 
+#include "gp_enclave.h"
+
+ #include <stdint.h>
+ #include <malloc.h>
+ #include <string.h>
+@@ -20,10 +22,8 @@
+ #include <tee_client_type.h>
+ 
+ #include "secgear_defs.h"
+-#include "enclave.h"
+-#include "enclave_internal.h"
+ #include "enclave_log.h"
+-#include "gp_enclave.h"
+#include "secgear_uswitchless.h"
+ #include "register_agent.h"
+ #include "gp_uswitchless.h"
+ #include "gp_shared_memory_defs.h"
+@@ -370,13 +370,13 @@ cc_enclave_result_t init_uswitchless(cc_enclave_t *enclave, const enclave_featur
+         return CC_ERROR_SWITCHLESS_REINIT;
+     }
+ 
+-    sl_task_pool_config_t *cfg = (sl_task_pool_config_t *)feature->feature_desc;
+-    if (!uswitchless_is_valid_config(cfg)) {
+    cc_sl_config_t cfg = *((cc_sl_config_t *)feature->feature_desc);
+    if (!uswitchless_is_valid_config(&cfg)) {
+         return CC_ERROR_BAD_PARAMETERS;
+     }
+-    uswitchless_adjust_config(cfg);
+    uswitchless_adjust_config(&cfg);
+ 
+-    size_t pool_buf_len = sl_get_pool_buf_len_by_config(cfg);
+    size_t pool_buf_len = sl_get_pool_buf_len_by_config(&cfg);
+     void *pool_buf = gp_malloc_shared_memory(enclave, pool_buf_len, true);
+     if (pool_buf == NULL) {
+         return CC_ERROR_OUT_OF_MEMORY;
+@@ -384,10 +384,10 @@ cc_enclave_result_t init_uswitchless(cc_enclave_t *enclave, const enclave_featur
+     (void)memset(pool_buf, 0, pool_buf_len);
+ 
+     // Fill config
+-    (void)memcpy(pool_buf, cfg, sizeof(sl_task_pool_config_t));
+    (void)memcpy(pool_buf, &cfg, sizeof(cc_sl_config_t));
+ 
+     // Layout task pool
+-    sl_task_pool_t *pool = uswitchless_create_task_pool(pool_buf, cfg);
+    sl_task_pool_t *pool = uswitchless_create_task_pool(pool_buf, &cfg);
+     if (pool == NULL) {
+         (void)gp_free_shared_memory(enclave, pool_buf);
+         return CC_ERROR_OUT_OF_MEMORY;
+diff --git a/src/host_src/gp/gp_enclave.h b/src/host_src/gp/gp_enclave.h
+index e788ed3..2fdfc99 100644
+--- a/src/host_src/gp/gp_enclave.h
+++ b/src/host_src/gp/gp_enclave.h
+@@ -15,6 +15,8 @@
+ 
+ #include "tee_client_api.h"
+ #include "switchless_defs.h"
+#include "enclave.h"
+#include "enclave_internal.h"
+ 
+ enum
+ {
+diff --git a/src/host_src/gp/gp_uswitchless.c b/src/host_src/gp/gp_uswitchless.c
+index 8db756e..f1288c2 100644
+--- a/src/host_src/gp/gp_uswitchless.c
+++ b/src/host_src/gp/gp_uswitchless.c
+@@ -34,19 +34,20 @@
+ #define SWITCHLESS_DEFAULT_TWORKERS 8
+ #define SWITCHLESS_DEFAULT_POOL_SIZE_QWORDS 1
+ 
+-bool uswitchless_is_valid_config(sl_task_pool_config_t *cfg)
+bool uswitchless_is_valid_config(cc_sl_config_t *cfg)
+ {
+     if ((cfg->num_uworkers > SWITCHLESS_MAX_UWORKERS) ||
+         (cfg->num_tworkers > SWITCHLESS_MAX_TWORKERS) ||
+         (cfg->num_max_params > SWITCHLESS_MAX_PARAMETER_NUM) ||
+-        (cfg->call_pool_size_qwords > SWITCHLESS_MAX_POOL_SIZE_QWORDS)) {
+        (cfg->sl_call_pool_size_qwords > SWITCHLESS_MAX_POOL_SIZE_QWORDS) ||
+        (cfg->workers_policy >= WORKERS_POLICY_MAX)) {
+         return false;
+     }
+ 
+     return true;
+ }
+ 
+-void uswitchless_adjust_config(sl_task_pool_config_t *cfg)
+void uswitchless_adjust_config(cc_sl_config_t *cfg)
+ {
+     if (cfg->num_uworkers == 0) {
+         cfg->num_uworkers = SWITCHLESS_DEFAULT_UWORKERS;
+@@ -56,14 +57,14 @@ void uswitchless_adjust_config(sl_task_pool_config_t *cfg)
+         cfg->num_tworkers = SWITCHLESS_DEFAULT_TWORKERS;
+     }
+ 
+-    if (cfg->call_pool_size_qwords == 0) {
+-        cfg->call_pool_size_qwords = SWITCHLESS_DEFAULT_POOL_SIZE_QWORDS;
+    if (cfg->sl_call_pool_size_qwords == 0) {
+        cfg->sl_call_pool_size_qwords = SWITCHLESS_DEFAULT_POOL_SIZE_QWORDS;
+     }
+ }
+ 
+-sl_task_pool_t *uswitchless_create_task_pool(void *pool_buf, sl_task_pool_config_t *pool_cfg)
+sl_task_pool_t *uswitchless_create_task_pool(void *pool_buf, cc_sl_config_t *pool_cfg)
+ {
+-    size_t bit_buf_size = pool_cfg->call_pool_size_qwords * sizeof(uint64_t);
+    size_t bit_buf_size = pool_cfg->sl_call_pool_size_qwords * sizeof(uint64_t);
+     sl_task_pool_t *pool = (sl_task_pool_t *)calloc(sizeof(sl_task_pool_t) + bit_buf_size, sizeof(char));
+     if (pool == NULL) {
+         return NULL;
+@@ -76,7 +77,7 @@ sl_task_pool_t *uswitchless_create_task_pool(void *pool_buf, sl_task_pool_config
+     pool->pool_buf = (char *)pool_buf;
+     pool->free_bit_buf = (uint64_t *)((char *)pool + sizeof(sl_task_pool_t));
+     (void)memset(pool->free_bit_buf, 0xFF, bit_buf_size);
+-    pool->signal_bit_buf = (uint64_t *)(pool->pool_buf + sizeof(sl_task_pool_config_t));
+    pool->signal_bit_buf = (uint64_t *)(pool->pool_buf + sizeof(cc_sl_config_t));
+     pool->task_buf = (char *)pool->signal_bit_buf + pool->bit_buf_size;
+ 
+     return pool;
+@@ -99,7 +100,7 @@ bool uswitchless_is_valid_param_num(cc_enclave_t *enclave, uint32_t argc)
+ int uswitchless_get_idle_task_index(cc_enclave_t *enclave)
+ {
+     sl_task_pool_t *pool = USWITCHLESS_TASK_POOL(enclave);
+-    int call_pool_size_qwords = (int)pool->pool_cfg.call_pool_size_qwords;
+    int call_pool_size_qwords = (int)pool->pool_cfg.sl_call_pool_size_qwords;
+     uint64_t *free_bit_buf = pool->free_bit_buf;
+     int start_bit = 0;
+     int end_bit = 0;
+diff --git a/src/host_src/gp/gp_uswitchless.h b/src/host_src/gp/gp_uswitchless.h
+index e435ad1..4d957ce 100644
+--- a/src/host_src/gp/gp_uswitchless.h
+++ b/src/host_src/gp/gp_uswitchless.h
+@@ -19,6 +19,7 @@
+ #include <stdbool.h>
+ #include "enclave.h"
+ #include "switchless_defs.h"
+#include "secgear_uswitchless.h"
+ 
+ #ifdef __cplusplus
+ extern "C" {
+@@ -32,7 +33,7 @@ extern "C" {
+  *      true: valid
+  *      false: invalid
+  */
+-bool uswitchless_is_valid_config(sl_task_pool_config_t *cfg);
+bool uswitchless_is_valid_config(cc_sl_config_t *cfg);
+ 
+ /*
+  * Summary: Adjusting default configurations
+@@ -40,7 +41,7 @@ bool uswitchless_is_valid_config(sl_task_pool_config_t *cfg);
+  *      cfg: configuration information of the task pool
+  * Return: NA
+  */
+-void uswitchless_adjust_config(sl_task_pool_config_t *cfg);
+void uswitchless_adjust_config(cc_sl_config_t *cfg);
+ 
+ /*
+  * Summary: initializing the switchless invoking task pool
+@@ -49,7 +50,7 @@ void uswitchless_adjust_config(sl_task_pool_config_t *cfg);
+  *      pool_cfg: configuration information of the task pool
+  * Return: NA
+  */
+-sl_task_pool_t *uswitchless_create_task_pool(void *pool_buf, sl_task_pool_config_t *pool_cfg);
+sl_task_pool_t *uswitchless_create_task_pool(void *pool_buf, cc_sl_config_t *pool_cfg);
+ 
+ /*
+  * Summary: obtains the index of an idle task area from specified enclave
+-- 
+2.27.0
+
--- a/0051-asynchronous-switchless.patch
+++ b/0051-asynchronous-switchless.patch
@ -0,0 +1,515 @@
+From f73e925c5bd78dff9c6398f62386c86d1e7aaf01 Mon Sep 17 00:00:00 2001
+From: modric <wangyu283@huawei.com>
+Date: Wed, 9 Nov 2022 15:14:08 +0800
+Subject: [PATCH 2/4] asynchronous switchless
+
+---
+ inc/common_inc/switchless_defs.h |  3 +-
+ inc/host_inc/enclave.h           | 13 ++++++++
+ inc/host_inc/enclave_internal.h  | 12 ++++---
+ inc/host_inc/status.h            |  4 ++-
+ src/host_src/enclave.c           | 18 ++++++++++
+ src/host_src/gp/gp_enclave.c     | 57 ++++++++++++++++++++++++++++----
+ src/host_src/gp/gp_uswitchless.c | 44 +++++++++++++++++++++---
+ src/host_src/gp/gp_uswitchless.h | 35 ++++++++++++++++----
+ tools/codegener/Genheader.ml     | 29 +++++++++++++++-
+ tools/codegener/Genuntrust.ml    | 54 ++++++++++++++++++++++++++++--
+ 10 files changed, 242 insertions(+), 27 deletions(-)
+
+diff --git a/inc/common_inc/switchless_defs.h b/inc/common_inc/switchless_defs.h
+index 84629c3..b525df0 100644
+--- a/inc/common_inc/switchless_defs.h
+++ b/inc/common_inc/switchless_defs.h
+@@ -59,7 +59,8 @@ typedef struct {
+ 
+ typedef struct {
+     volatile uint32_t status;
+-    uint32_t func_id;
+    uint16_t func_id;
+    uint16_t retval_size;
+     volatile uint64_t ret_val;
+     uint64_t params[0];
+ } sl_task_t;
+diff --git a/inc/host_inc/enclave.h b/inc/host_inc/enclave.h
+index 0dde8c3..94aedf4 100644
+--- a/inc/host_inc/enclave.h
+++ b/inc/host_inc/enclave.h
+@@ -87,6 +87,19 @@ CC_API_SPEC cc_enclave_result_t cc_enclave_create(
+ 
+ CC_API_SPEC cc_enclave_result_t cc_enclave_destroy(cc_enclave_t *context);
+ 
+/*
+ * Summary: Obtains the result of the switchless asynchronous invoking task
+ * Parameters:
+ *     enclave: enclave
+ *     task_id: id of an asynchronous invoking task
+ *     retval: accepts the return value, NULL is required for functions of the void type or the return value is ignored
+ * Return:
+ *     CC_SUCCESS, success;
+ *     CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED, the asynchronous invoking task is not completed;
+ *     others failed.
+ */
+CC_API_SPEC cc_enclave_result_t cc_sl_get_async_result(cc_enclave_t *enclave, int task_id, void *retval);
+
+ /*automatic file generation required: aligned bytes*/
+ #define ALIGNMENT_SIZE (2 * sizeof(void*))
+ 
+diff --git a/inc/host_inc/enclave_internal.h b/inc/host_inc/enclave_internal.h
+index 5a8af38..a66d1a3 100644
+--- a/inc/host_inc/enclave_internal.h
+++ b/inc/host_inc/enclave_internal.h
+@@ -31,7 +31,8 @@ typedef enum _enclave_state {
+ } enclave_state_t;
+ 
+ typedef struct {
+-    uint32_t func_id;
+    uint16_t func_id;
+    uint16_t retval_size;
+     uint32_t argc;
+     void *args;
+ } sl_ecall_func_info_t;
+@@ -66,10 +67,11 @@ struct cc_enclave_ops {
+ 		    const void *ocall_table);
+ 
+     /* switchless ecall */
+-    cc_enclave_result_t (*cc_sl_ecall_enclave)(cc_enclave_t *enclave,
+-                                               void *retval,
+-                                               size_t retval_size,
+-                                               sl_ecall_func_info_t *func_info);
+    cc_enclave_result_t (*cc_sl_ecall_enclave)(cc_enclave_t *enclave, void *retval, sl_ecall_func_info_t *func_info);
+
+    /* switchless async ecall */
+    cc_enclave_result_t (*cc_sl_async_ecall)(cc_enclave_t *enclave, int *task_id, sl_ecall_func_info_t *func_info);
+    cc_enclave_result_t (*cc_sl_async_ecall_get_result)(cc_enclave_t *enclave, int task_id, void *retval);
+ 
+     /* shared memory */
+     void *(*cc_malloc_shared_memory)(cc_enclave_t *enclave, size_t size, bool is_control_buf);
+diff --git a/inc/host_inc/status.h b/inc/host_inc/status.h
+index 15de9d1..4f982f8 100644
+--- a/inc/host_inc/status.h
+++ b/inc/host_inc/status.h
+@@ -165,7 +165,9 @@ typedef enum _enclave_result_t
+     CC_ERROR_SHARED_MEMORY_REPEAT_REGISTER, 	        /* The shared memory is repeatedly registered */
+     CC_ERROR_SHARED_MEMORY_START_ADDR_INVALID, 	        /* Invalid start address of the shared memory */
+     CC_ERROR_SHARED_MEMORY_NOT_REGISTERED, 	            /* Unregistered shared memory */
+-    CC_ERROR_ADDRESS_UNACCESSABLE, 	            /* Memory address is not within enclave */
+    CC_ERROR_ADDRESS_UNACCESSABLE,                      /* Memory address is not within enclave */
+    CC_ERROR_SWITCHLESS_INVALID_TASK_ID,                /* Invalid invoking task ID */
+    CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED,          /* The asynchronous invoking task is not completed */
+     CC_MAXIMUM_ERROR,
+ } cc_enclave_result_t;
+ 
+diff --git a/src/host_src/enclave.c b/src/host_src/enclave.c
+index 2e6a28a..d8b7d35 100644
+--- a/src/host_src/enclave.c
+++ b/src/host_src/enclave.c
+@@ -20,6 +20,7 @@
+ #include "enclave.h"
+ #include "enclave_log.h"
+ #include "enclave_internal.h"
+#include "secgear_defs.h"
+ 
+ extern list_ops_management  g_list_ops;
+ 
+@@ -313,3 +314,20 @@ cc_enclave_result_t cc_enclave_destroy(cc_enclave_t *context)
+ 
+     return CC_SUCCESS;
+ }
+
+cc_enclave_result_t cc_sl_get_async_result(cc_enclave_t *enclave, int task_id, void *retval)
+{
+    cc_enclave_result_t ret;
+
+    if (enclave == NULL || task_id < 0 || !enclave->used_flag) {
+        return CC_ERROR_BAD_PARAMETERS;
+    }
+
+    CC_RWLOCK_LOCK_RD(&enclave->rwlock);
+
+    ret = enclave->list_ops_node->ops_desc->ops->cc_sl_async_ecall_get_result(enclave, task_id, retval);
+
+    CC_RWLOCK_UNLOCK(&enclave->rwlock);
+
+    return ret;
+}
+\ No newline at end of file
+diff --git a/src/host_src/gp/gp_enclave.c b/src/host_src/gp/gp_enclave.c
+index f77cdd8..5345973 100644
+--- a/src/host_src/gp/gp_enclave.c
+++ b/src/host_src/gp/gp_enclave.c
+@@ -817,10 +817,7 @@ done:
+     return result;
+ }
+ 
+-cc_enclave_result_t cc_sl_enclave_call_function(cc_enclave_t *enclave,
+-                                                void *retval,
+-                                                size_t retval_size,
+-                                                sl_ecall_func_info_t *func_info)
+cc_enclave_result_t cc_sl_enclave_call_function(cc_enclave_t *enclave, void *retval, sl_ecall_func_info_t *func_info)
+ {
+     if (!uswitchless_is_switchless_enabled(enclave)) {
+         return CC_ERROR_SWITCHLESS_DISABLED;
+@@ -835,19 +832,67 @@ cc_enclave_result_t cc_sl_enclave_call_function(cc_enclave_t *enclave,
+         return CC_ERROR_SWITCHLESS_TASK_POOL_FULL;
+     }
+ 
+-    uswitchless_fill_task(enclave, task_index, func_info->func_id, func_info->argc, func_info->args);
+    uswitchless_fill_task(enclave, task_index, func_info->func_id, func_info->retval_size, func_info->argc,
+        func_info->args);
+     uswitchless_submit_task(enclave, task_index);
+-    cc_enclave_result_t ret = uswitchless_get_task_result(enclave, task_index, retval, retval_size);
+    cc_enclave_result_t ret = uswitchless_get_task_result(enclave, task_index, retval);
+     uswitchless_put_idle_task_by_index(enclave, task_index);
+ 
+     return ret;
+ }
+ 
+cc_enclave_result_t cc_sl_async_ecall(cc_enclave_t *enclave, int *task_id, sl_ecall_func_info_t *func_info)
+{
+    if (task_id == NULL) {
+        return CC_ERROR_BAD_PARAMETERS;
+    }
+
+    if (!uswitchless_is_switchless_enabled(enclave)) {
+        return CC_ERROR_SWITCHLESS_DISABLED;
+    }
+
+    if (!uswitchless_is_valid_param_num(enclave, func_info->argc)) {
+        return CC_ERROR_SWITCHLESS_INVALID_ARG_NUM;
+    }
+
+    int task_index = uswitchless_get_idle_task_index(enclave);
+    if (task_index < 0) {
+        return CC_ERROR_SWITCHLESS_TASK_POOL_FULL;
+    }
+
+    uswitchless_fill_task(enclave, task_index, func_info->func_id, func_info->retval_size, func_info->argc,
+        func_info->args);
+    uswitchless_submit_task(enclave, task_index);
+    *task_id = task_index;
+
+    return CC_SUCCESS;
+}
+
+cc_enclave_result_t cc_sl_async_ecall_check_result(cc_enclave_t *enclave, int task_id, void *retval)
+{
+    if (!uswitchless_is_switchless_enabled(enclave)) {
+        return CC_ERROR_SWITCHLESS_DISABLED;
+    }
+
+    if (!uswitchless_is_valid_task_index(enclave, task_id)) {
+        return CC_ERROR_SWITCHLESS_INVALID_TASK_ID;
+    }
+
+    cc_enclave_result_t ret = uswitchless_get_async_task_result(enclave, task_id, retval);
+    if (ret != CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+        uswitchless_put_idle_task_by_index(enclave, task_id);
+    }
+
+    return ret;
+}
+
+ const struct cc_enclave_ops g_ops = {
+     .cc_create_enclave  = _gp_create,
+     .cc_destroy_enclave = _gp_destroy,
+     .cc_ecall_enclave =  cc_enclave_call_function,
+     .cc_sl_ecall_enclave = cc_sl_enclave_call_function,
+    .cc_sl_async_ecall = cc_sl_async_ecall,
+    .cc_sl_async_ecall_get_result = cc_sl_async_ecall_check_result,
+     .cc_malloc_shared_memory = gp_malloc_shared_memory,
+     .cc_free_shared_memory = gp_free_shared_memory,
+     .cc_register_shared_memory = gp_register_shared_memory,
+diff --git a/src/host_src/gp/gp_uswitchless.c b/src/host_src/gp/gp_uswitchless.c
+index f1288c2..2a315ea 100644
+--- a/src/host_src/gp/gp_uswitchless.c
+++ b/src/host_src/gp/gp_uswitchless.c
+@@ -97,6 +97,21 @@ bool uswitchless_is_valid_param_num(cc_enclave_t *enclave, uint32_t argc)
+     return argc <= USWITCHLESS_TASK_POOL(enclave)->pool_cfg.num_max_params;
+ }
+ 
+bool uswitchless_is_valid_task_index(cc_enclave_t *enclave, int task_index)
+{
+    sl_task_pool_t *pool = USWITCHLESS_TASK_POOL(enclave);
+    int task_total = pool->pool_cfg.sl_call_pool_size_qwords * SWITCHLESS_BITS_IN_QWORD;
+
+    if (task_index < 0 || task_index >= task_total) {
+        return false;
+    }
+
+    int i = task_index / SWITCHLESS_BITS_IN_QWORD;
+    int j = task_index % SWITCHLESS_BITS_IN_QWORD;
+
+    return !((*(pool->free_bit_buf + i)) & (1UL << j));
+}
+
+ int uswitchless_get_idle_task_index(cc_enclave_t *enclave)
+ {
+     sl_task_pool_t *pool = USWITCHLESS_TASK_POOL(enclave);
+@@ -144,11 +159,13 @@ static inline sl_task_t *uswitchless_get_task_by_index(cc_enclave_t *enclave, in
+     return (sl_task_t *)(pool->task_buf + task_index * pool->per_task_size);
+ }
+ 
+-void uswitchless_fill_task(cc_enclave_t *enclave, int task_index, uint32_t func_id, uint32_t argc, const void *args)
+void uswitchless_fill_task(cc_enclave_t *enclave, int task_index, uint16_t func_id, uint16_t retval_size,
+    uint32_t argc, const void *args)
+ {
+     sl_task_t *task = uswitchless_get_task_by_index(enclave, task_index);
+ 
+     task->func_id = func_id;
+    task->retval_size = retval_size;
+     __atomic_store_n(&task->status, SL_TASK_INIT, __ATOMIC_RELEASE);
+     memcpy(&task->params[0], args, sizeof(uint64_t) * argc);
+ }
+@@ -165,7 +182,7 @@ void uswitchless_submit_task(cc_enclave_t *enclave, int task_index)
+ 
+ #define CA_TIMEOUT_IN_SEC 60
+ #define CA_GETTIME_PER_CNT 100000000
+-cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave, int task_index, void *retval, size_t retval_size)
+cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave, int task_index, void *retval)
+ {
+     sl_task_t *task = uswitchless_get_task_by_index(enclave, task_index);
+     uint32_t cur_status;
+@@ -178,8 +195,8 @@ cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave, int task_
+     while (true) {
+         cur_status = __atomic_load_n(&task->status, __ATOMIC_ACQUIRE);
+         if (cur_status == SL_TASK_DONE_SUCCESS) {
+-            if ((retval != NULL) && (retval_size != 0)) {
+-                (void)memcpy(retval, (void *)&task->ret_val, retval_size);
+            if ((retval != NULL) && (task->retval_size > 0)) {
+                (void)memcpy(retval, (void *)&task->ret_val, task->retval_size);
+             }
+ 
+             return CC_SUCCESS;
+@@ -199,3 +216,22 @@ cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave, int task_
+ 
+     return CC_ERROR_TIMEOUT;
+ }
+
+cc_enclave_result_t uswitchless_get_async_task_result(cc_enclave_t *enclave, int task_index, void *retval)
+{
+    sl_task_t *task = uswitchless_get_task_by_index(enclave, task_index);
+    uint32_t cur_status;
+
+    cur_status = __atomic_load_n(&task->status, __ATOMIC_ACQUIRE);
+    if (cur_status == SL_TASK_DONE_SUCCESS) {
+        if ((retval != NULL) && (task->retval_size > 0)) {
+            (void)memcpy(retval, (void *)&task->ret_val, task->retval_size);
+        }
+
+        return CC_SUCCESS;
+    } else if (cur_status == SL_TASK_DONE_FAILED) {
+        return (cc_enclave_result_t)task->ret_val;
+    }
+
+    return CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED;
+}
+diff --git a/src/host_src/gp/gp_uswitchless.h b/src/host_src/gp/gp_uswitchless.h
+index 4d957ce..13ac14a 100644
+--- a/src/host_src/gp/gp_uswitchless.h
+++ b/src/host_src/gp/gp_uswitchless.h
+@@ -81,18 +81,26 @@ void uswitchless_put_idle_task_by_index(cc_enclave_t *enclave, int task_index);
+ void uswitchless_submit_task(cc_enclave_t *enclave, int task_index);
+ 
+ /*
+- * Summary: submitting a task
+ * Summary: Obtains the result of the switchless invoking task
+  * Parameters:
+  *      enclave: enclave
+  *      task_index: index of an task area
+  *      ret_val: address that accepts the return value
+- *      ret_val_size: size of the return value
+  * Return: CC_SUCCESS, success; others failed.
+  */
+-cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave,
+-                                                int task_index,
+-                                                void *ret_val,
+-                                                size_t ret_val_size);
+cc_enclave_result_t uswitchless_get_task_result(cc_enclave_t *enclave, int task_index, void *ret_val);
+
+/*
+ * Summary: Obtains the result of the switchless asynchronous invoking task
+ * Parameters:
+ *      enclave: enclave
+ *      task_index: index of an task area
+ *      ret_val: address that accepts the return value
+ * Return: CC_SUCCESS, success;
+           CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED, the asynchronous invoking task is not completed;
+           others failed.
+ */
+cc_enclave_result_t uswitchless_get_async_task_result(cc_enclave_t *enclave, int task_index, void *retval);
+ 
+ /*
+  * Summary: whether the switchless features is enabled
+@@ -115,17 +123,30 @@ bool uswitchless_is_switchless_enabled(cc_enclave_t *enclave);
+  */
+ bool uswitchless_is_valid_param_num(cc_enclave_t *enclave, uint32_t argc);
+ 
+/*
+ * Summary: whether the task index is valid
+ * Parameters:
+ *      enclave: enclave
+ *      argc: task index
+ * Return:
+ *      true: the task index is valid
+ *      false: invalid task index
+ */
+bool uswitchless_is_valid_task_index(cc_enclave_t *enclave, int task_index);
+
+ /*
+  * Summary: fill a task
+  * Parameters:
+  *      enclave: enclave
+  *      task_index: index of an task area
+  *      func_id: switchless function index
+ *      retval_size: size of the return value of the function
+  *      argc: number of parameters
+  *      args: parameter buffer
+  * Return: NA
+  */
+-void uswitchless_fill_task(cc_enclave_t *enclave, int task_index, uint32_t func_id, uint32_t argc, const void *args);
+void uswitchless_fill_task(cc_enclave_t *enclave, int task_index, uint16_t func_id, uint16_t retval_size,
+    uint32_t argc, const void *args);
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/tools/codegener/Genheader.ml b/tools/codegener/Genheader.ml
+index 9d0514c..e00157d 100644
+--- a/tools/codegener/Genheader.ml
+++ b/tools/codegener/Genheader.ml
+@@ -71,6 +71,27 @@ let generate_rproxy_prototype (fd: func_decl) =
+         "cc_enclave_result_t " ^ func_name ^ enclave_decl ^ func_args ^")";
+     ]
+ 
+let generate_rproxy_prototype_sl_async (tf: trusted_func) =
+  if not tf.tf_is_switchless then
+    [""]
+  else
+    let fd = tf.tf_fdecl in
+    let func_name = fd.fname ^ "_async" in
+    let enclave_decl = "(\n    cc_enclave_t *enclave,\n    int *task_id" in
+    let func_args =
+      let func_args_list =
+          List.map (fun f -> gen_parm_str f) fd.plist
+    in
+    if List.length fd.plist > 0 then
+      let func_args_pre = String.concat ",\n    " func_args_list in
+        ",\n    " ^ func_args_pre
+    else
+      ""
+    in
+    [
+        "cc_enclave_result_t " ^ func_name ^ enclave_decl ^ func_args ^")";
+    ]
+
+ let generate_parm_str (p: pdecl) =
+     let (_, declr) = p in
+     declr.identifier
+@@ -344,14 +365,20 @@ let generate_untrusted_header (ec: enclave_content) =
+     let r_proxy_proto =
+         List.map (fun f -> generate_rproxy_prototype f.tf_fdecl) ec.tfunc_decls
+     in
+    let r_proxy_proto_sl_async =
+        List.map (fun f -> generate_rproxy_prototype_sl_async f) ec.tfunc_decls
+    in
+     let r_proxy =
+         String.concat ";\n\n" (List.flatten r_proxy_proto)
+     in
+    let r_proxy_sl_async =
+        String.concat ";\n\n" (List.flatten r_proxy_proto_sl_async)
+    in
+     [
+         hfile_start ^ hfile_include; 
+         c_start;
+         agent_id;
+-        trust_fproto_com ^ r_proxy ^ ";";
+        trust_fproto_com ^ r_proxy ^ r_proxy_sl_async ^ ";";
+         if (List.length ec.ufunc_decls <> 0) then untrust_fproto_com ^ untrust_func ^ ";"
+         else "/**** There is no untrusted function ****/";
+         c_end; 
+diff --git a/tools/codegener/Genuntrust.ml b/tools/codegener/Genuntrust.ml
+index dc3010d..6fb4967 100644
+--- a/tools/codegener/Genuntrust.ml
+++ b/tools/codegener/Genuntrust.ml
+@@ -98,6 +98,24 @@ let set_ecall_func_arguments (fd : func_decl) =
+         else "")
+     ]
+ 
+let set_sl_async_ecall_func_arguments (fd : func_decl) =
+    [
+        sprintf "cc_enclave_result_t %s(\n    %s" (fd.fname ^ "_async")  "cc_enclave_t *enclave,\n    int *task_id"
+        ^ (if fd.plist <> [] then
+            ",\n    " ^
+            concat ",\n    "
+            (List.map
+                (fun (ptype, decl) ->
+                    match ptype with
+                    PTVal ty -> (sprintf "%s %s" (get_tystr ty) decl.identifier)
+                    | PTPtr (t, a) -> match (a.pa_rdonly, is_array decl) with
+                                      | (true, false) -> sprintf "const %s %s" (get_tystr t) decl.identifier
+                                      | (false, true) -> sprintf "%s %s%s" (get_tystr t) decl.identifier (set_array_dims_str decl.array_dims)
+                                      | (_, _) -> sprintf "%s %s" (get_tystr t) decl.identifier)
+            fd.plist)
+        else "")
+    ]
+
+ let set_sl_ecall_func (tf : trusted_func) =
+     let tfd = tf.tf_fdecl in
+     let init_point = set_init_pointer tfd in
+@@ -162,20 +180,52 @@ let set_sl_ecall_func (tf : trusted_func) =
+         "    /* Call the cc_enclave function */";
+ 
+         "    sl_ecall_func_info_t func_info = {";
+-        "         .func_id = " ^ "fid_" ^ tfd.fname ^ ",";
+        "         .func_id = " ^ "fid_" ^ tfd.fname ^ ",\n         .retval_size= " ^ out_retval_size ^ ",";
+         "         .argc = " ^ num_params ^ ",";
+         "         .args = " ^ out_params ^ ",";
+         "    };";
+ 
+         "    ret = enclave->list_ops_node->ops_desc->ops->cc_sl_ecall_enclave(enclave,";
+         "                                                                     " ^ out_retval ^ ",";
+-        "                                                                     " ^ out_retval_size ^ ",";
+         "                                                                     &func_info);\n";
+ 
+         "    pthread_rwlock_unlock(&enclave->rwlock);";
+         (if tfd.plist <> [] then "    free(params_buf);" else "");
+         "    return ret;";
+         "}";
+
+        "";
+        concat ",\n    " (set_sl_async_ecall_func_arguments tfd) ^ ")";
+        "{";
+        "    cc_enclave_result_t ret;\n";
+        "    if (enclave == NULL) {";
+        "        return CC_ERROR_BAD_PARAMETERS;";
+        "    }\n";
+        "    if (pthread_rwlock_rdlock(&enclave->rwlock)) {";
+        "        return CC_ERROR_BUSY;";
+        "    }\n";
+        "    if (enclave->list_ops_node == NULL ||\n        enclave->list_ops_node->ops_desc == NULL ||";
+        "        enclave->list_ops_node->ops_desc->ops == NULL ||";
+        "        enclave->list_ops_node->ops_desc->ops->cc_sl_async_ecall == NULL) {";
+        "        pthread_rwlock_unlock(&enclave->rwlock);";
+        "        return CC_ERROR_BAD_PARAMETERS;";
+        "    }";
+        "";
+        params;
+        "    /* Call the cc_enclave function */";
+
+        "    sl_ecall_func_info_t func_info = {";
+        "         .func_id = " ^ "fid_" ^ tfd.fname ^ ",\n         .retval_size= " ^ out_retval_size ^ ",";
+        "         .argc = " ^ num_params ^ ",";
+        "         .args = " ^ out_params ^ ",";
+        "    };";
+
+        "    ret = enclave->list_ops_node->ops_desc->ops->cc_sl_async_ecall(enclave, task_id, &func_info);\n";
+
+        "    pthread_rwlock_unlock(&enclave->rwlock);";
+        (if tfd.plist <> [] then "    free(params_buf);" else "");
+        "    return ret;";
+        "}";
+     ]
+ 
+ let set_ecall_func (tf : trusted_func) =
+-- 
+2.27.0
+
--- a/0052-rollback-to-common-invoking-when-async-invoking-fail.patch
+++ b/0052-rollback-to-common-invoking-when-async-invoking-fail.patch
@ -0,0 +1,307 @@
+From 232ba565206caf01e7f514c0c5735a8e8d3ae06a Mon Sep 17 00:00:00 2001
+From: modric <wangyu283@huawei.com>
+Date: Wed, 9 Nov 2022 15:17:28 +0800
+Subject: [PATCH 3/4] rollback to common invoking when async invoking fails
+
+---
+ inc/host_inc/secgear_uswitchless.h |  7 ++-
+ inc/host_inc/status.h              |  1 +
+ src/host_src/gp/gp_enclave.c       |  5 ++
+ src/host_src/gp/gp_uswitchless.c   |  5 ++
+ src/host_src/gp/gp_uswitchless.h   | 10 ++++
+ tools/codegener/Genheader.ml       | 13 ++++-
+ tools/codegener/Gentrust.ml        | 10 ++--
+ tools/codegener/Genuntrust.ml      | 92 +++++++++++++++++++++++++++++-
+ 8 files changed, 133 insertions(+), 10 deletions(-)
+
+diff --git a/inc/host_inc/secgear_uswitchless.h b/inc/host_inc/secgear_uswitchless.h
+index 8e21fd9..2ea4691 100644
+--- a/inc/host_inc/secgear_uswitchless.h
+++ b/inc/host_inc/secgear_uswitchless.h
+@@ -81,10 +81,13 @@ typedef struct {
+     uint32_t retries_before_sleep;
+ 
+     /* Worker thread scheduling policy, refer to cc_workers_policy_t, only for GP */
+-    uint64_t workers_policy;
+    uint32_t workers_policy;
+
+    /* Indicates whether to roll back to common invoking when asynchronous switchless invoking fails, only for GP */
+    uint32_t rollback_to_common;
+ } cc_sl_config_t;
+ 
+-#define CC_USWITCHLESS_CONFIG_INITIALIZER   {1, 1, 1, 16, 0, 0, WORKERS_POLICY_BUSY}
+#define CC_USWITCHLESS_CONFIG_INITIALIZER   {1, 1, 1, 16, 0, 0, WORKERS_POLICY_BUSY, 0}
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/inc/host_inc/status.h b/inc/host_inc/status.h
+index 4f982f8..7f8daaa 100644
+--- a/inc/host_inc/status.h
+++ b/inc/host_inc/status.h
+@@ -168,6 +168,7 @@ typedef enum _enclave_result_t
+     CC_ERROR_ADDRESS_UNACCESSABLE,                      /* Memory address is not within enclave */
+     CC_ERROR_SWITCHLESS_INVALID_TASK_ID,                /* Invalid invoking task ID */
+     CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED,          /* The asynchronous invoking task is not completed */
+    CC_ERROR_SWITCHLESS_ROLLBACK2COMMON,                /* rollback to common invoking when async invoking fails */
+     CC_MAXIMUM_ERROR,
+ } cc_enclave_result_t;
+ 
+diff --git a/src/host_src/gp/gp_enclave.c b/src/host_src/gp/gp_enclave.c
+index 5345973..521a850 100644
+--- a/src/host_src/gp/gp_enclave.c
+++ b/src/host_src/gp/gp_enclave.c
+@@ -857,6 +857,11 @@ cc_enclave_result_t cc_sl_async_ecall(cc_enclave_t *enclave, int *task_id, sl_ec
+ 
+     int task_index = uswitchless_get_idle_task_index(enclave);
+     if (task_index < 0) {
+        /* Need roll back to common invoking when asynchronous invoking fails. */
+        if (uswitchless_need_rollback_to_common(enclave)) {
+            return CC_ERROR_SWITCHLESS_ROLLBACK2COMMON;
+        }
+
+         return CC_ERROR_SWITCHLESS_TASK_POOL_FULL;
+     }
+ 
+diff --git a/src/host_src/gp/gp_uswitchless.c b/src/host_src/gp/gp_uswitchless.c
+index 2a315ea..53ecc55 100644
+--- a/src/host_src/gp/gp_uswitchless.c
+++ b/src/host_src/gp/gp_uswitchless.c
+@@ -112,6 +112,11 @@ bool uswitchless_is_valid_task_index(cc_enclave_t *enclave, int task_index)
+     return !((*(pool->free_bit_buf + i)) & (1UL << j));
+ }
+ 
+bool uswitchless_need_rollback_to_common(cc_enclave_t *enclave)
+{
+    return USWITCHLESS_TASK_POOL(enclave)->pool_cfg.rollback_to_common > 0;
+}
+
+ int uswitchless_get_idle_task_index(cc_enclave_t *enclave)
+ {
+     sl_task_pool_t *pool = USWITCHLESS_TASK_POOL(enclave);
+diff --git a/src/host_src/gp/gp_uswitchless.h b/src/host_src/gp/gp_uswitchless.h
+index 13ac14a..a0ea117 100644
+--- a/src/host_src/gp/gp_uswitchless.h
+++ b/src/host_src/gp/gp_uswitchless.h
+@@ -134,6 +134,16 @@ bool uswitchless_is_valid_param_num(cc_enclave_t *enclave, uint32_t argc);
+  */
+ bool uswitchless_is_valid_task_index(cc_enclave_t *enclave, int task_index);
+ 
+/*
+ * Summary: whether to roll back to common invoking when asynchronous switchless invoking fails
+ * Parameters:
+ *      enclave: enclave
+ * Return:
+ *      true: yes
+ *      false: no
+ */
+bool uswitchless_need_rollback_to_common(cc_enclave_t *enclave);
+
+ /*
+  * Summary: fill a task
+  * Parameters:
+diff --git a/tools/codegener/Genheader.ml b/tools/codegener/Genheader.ml
+index e00157d..0f244f3 100644
+--- a/tools/codegener/Genheader.ml
+++ b/tools/codegener/Genheader.ml
+@@ -31,6 +31,14 @@ let generate_args_include (ufs: untrusted_func list) =
+     "#include \"enclave.h\"\n" ^
+     error_include ^ "\n"
+ 
+let generate_function_id_ex (tf: trusted_func) =
+    let f = tf.tf_fdecl in
+    let f_name = f.fname in
+    if tf.tf_is_switchless then
+        "fid_sl_async_" ^ f_name
+    else
+        "fid_" ^ f_name
+
+ let generate_function_id (f: func_decl) =
+     let f_name = f.fname in
+     "fid_" ^ f_name
+@@ -77,7 +85,8 @@ let generate_rproxy_prototype_sl_async (tf: trusted_func) =
+   else
+     let fd = tf.tf_fdecl in
+     let func_name = fd.fname ^ "_async" in
+-    let enclave_decl = "(\n    cc_enclave_t *enclave,\n    int *task_id" in
+    let enclave_decl =
+        "(\n    " ^  (match fd.rtype with Void -> "cc_enclave_t *enclave,\n    int *task_id" | _ -> "cc_enclave_t *enclave,\n    int *task_id,\n    " ^ (get_tystr fd.rtype ^ " *retval")) in
+     let func_args =
+       let func_args_list =
+           List.map (fun f -> gen_parm_str f) fd.plist
+@@ -270,7 +279,7 @@ let generate_args_header (ec: enclave_content) =
+     let trust_fid_body =
+         let trust_fid_pre =
+             List.mapi
+-        (fun i f -> sprintf "    %s = %d," (generate_function_id f.tf_fdecl) (i + 2)) tfunc_decls
+        (fun i f -> sprintf "    %s = %d," (generate_function_id_ex f) (i + 2)) ec.tfunc_decls
+     in
+     String.concat "\n" trust_fid_pre
+         in
+diff --git a/tools/codegener/Gentrust.ml b/tools/codegener/Gentrust.ml
+index 6b6fa00..d950899 100644
+--- a/tools/codegener/Gentrust.ml
+++ b/tools/codegener/Gentrust.ml
+@@ -146,6 +146,7 @@ let set_switchless_ecall_func (tf : trusted_func) =
+         match tfd.rtype with
+             | Void -> ""
+             | _ -> "    (void)memcpy(retval, &ret, sizeof(ret));" in
+    if tf.tf_is_switchless then
+     [
+         sprintf "\nvoid sl_ecall_%s(void *task_buf)" tfd.fname;
+         "{";
+@@ -160,15 +161,15 @@ let set_switchless_ecall_func (tf : trusted_func) =
+         write_back_retval;
+         "}";
+     ]
+    else ["";]
+ 
+ let set_ecall_func (tf : trusted_func) =
+-    if tf.tf_is_switchless then
+-        set_switchless_ecall_func tf
+-    else
+    let slfunc = String.concat " " (set_switchless_ecall_func tf) in
+     let tfd = tf.tf_fdecl in 
+     let params_point = set_parameters_point tfd in
+     let out_params = set_out_params tfd in
+     [
+        "" ^ slfunc;
+         sprintf "cc_enclave_result_t ecall_%s (" tfd.fname;
+         "    uint8_t* in_buf,";
+         "    size_t in_buf_size,";
+@@ -396,8 +397,7 @@ let gen_trusted(ec : enclave_content) =
+             "    (cc_ecall_func_t) ecall_unregister_shared_memory,";
+             "    " ^ concat ",\n    "
+                 (List.map (fun (tf) ->
+-                    sprintf "(cc_ecall_func_t) ecall_%s" tf.tf_fdecl.fname)
+-                (List.filter (fun tf -> not tf.tf_is_switchless) trust_funcs));
+                    sprintf "(cc_ecall_func_t) ecall_%s" tf.tf_fdecl.fname) trust_funcs);
+             "};";
+             "";
+             "size_t ecall_table_size = CC_ARRAY_LEN(cc_ecall_tables);\n";
+diff --git a/tools/codegener/Genuntrust.ml b/tools/codegener/Genuntrust.ml
+index 6fb4967..8bc8e03 100644
+--- a/tools/codegener/Genuntrust.ml
+++ b/tools/codegener/Genuntrust.ml
+@@ -80,6 +80,40 @@ let set_call_user_func (fd : func_decl) =
+         "}";
+     ]
+ 
+let sl_async_set_call_user_func (fd : func_decl) =
+    [
+        "/* Call the cc_enclave function */";
+        "if (!enclave) {";
+        "    ret = CC_ERROR_BAD_PARAMETERS;";
+        "    goto exit;";
+        "}";
+        "if (pthread_rwlock_rdlock(&enclave->rwlock)) {";
+        "    ret = CC_ERROR_BUSY;";
+        "    goto exit;";
+        "}";
+        "if (!enclave->list_ops_node || !enclave->list_ops_node->ops_desc ||";
+        "         !enclave->list_ops_node->ops_desc->ops ||";
+        "         !enclave->list_ops_node->ops_desc->ops->cc_ecall_enclave) {";
+        "    ret = CC_ERROR_BAD_PARAMETERS;";
+        "    goto exit;";
+        "}";
+        "if ((ret = enclave->list_ops_node->ops_desc->ops->cc_ecall_enclave(";
+        "         enclave,";
+        sprintf "         fid_sl_async_%s," fd.fname;
+        "         in_buf,";
+        "         in_buf_size,";
+        "         out_buf,";
+        "         out_buf_size,";
+        "         &ms,";
+        "         &ocall_table)) != CC_SUCCESS) {";
+        "    pthread_rwlock_unlock(&enclave->rwlock);";
+        "    goto exit; }";
+        "if (pthread_rwlock_unlock(&enclave->rwlock)) {";
+        "    ret = CC_ERROR_BUSY;";
+        "    goto exit;";
+        "}";
+    ]
+
+ let set_ecall_func_arguments (fd : func_decl) =
+     [
+         sprintf "cc_enclave_result_t %s(\n    %s" fd.fname  (match fd.rtype with Void -> "cc_enclave_t *enclave" | _ -> "cc_enclave_t *enclave,\n    " ^ (get_tystr fd.rtype ^ "* retval"))
+@@ -100,7 +134,7 @@ let set_ecall_func_arguments (fd : func_decl) =
+ 
+ let set_sl_async_ecall_func_arguments (fd : func_decl) =
+     [
+-        sprintf "cc_enclave_result_t %s(\n    %s" (fd.fname ^ "_async")  "cc_enclave_t *enclave,\n    int *task_id"
+        sprintf "cc_enclave_result_t %s(\n    %s" (fd.fname ^ "_async") (match fd.rtype with Void -> "cc_enclave_t *enclave,\n    int *task_id" | _ -> "cc_enclave_t *enclave,\n    int *task_id,\n    " ^ (get_tystr fd.rtype ^ " *retval"))
+         ^ (if fd.plist <> [] then
+             ",\n    " ^
+             concat ",\n    "
+@@ -119,6 +153,7 @@ let set_sl_async_ecall_func_arguments (fd : func_decl) =
+ let set_sl_ecall_func (tf : trusted_func) =
+     let tfd = tf.tf_fdecl in
+     let init_point = set_init_pointer tfd in
+    let arg_size = set_args_size tfd in
+     let get_param_name (_, decl) = decl.identifier in
+     (*let is_ptr_type (ptype) =
+         match ptype with
+@@ -224,6 +259,61 @@ let set_sl_ecall_func (tf : trusted_func) =
+ 
+         "    pthread_rwlock_unlock(&enclave->rwlock);";
+         (if tfd.plist <> [] then "    free(params_buf);" else "");
+        "    if (ret != CC_ERROR_SWITCHLESS_ROLLBACK2COMMON) {\n        return ret;\n    }";
+        "\n    /* rollback to common invoking when async invoking fails */";
+        "    ret = CC_FAIL;";
+        "    *task_id = -1;";
+        "";
+        "    /* Init buffer and size  */";
+        "    size_t in_buf_size = 0;";
+        "    size_t out_buf_size = 0;";
+        "    uint8_t* in_buf = NULL;";
+        "    uint8_t* out_buf = NULL;";
+        "    uint32_t ms = TEE_SECE_AGENT_ID;";
+        sprintf "    %s_size_t args_size;" tfd.fname;
+        "";
+        "    /* Init pointer */";
+        if init_point <> ["";"";""] then
+            concat "\n" init_point
+        else "    /* There is no pointer */";
+        "";
+        "    memset(&args_size, 0, sizeof(args_size));";
+        "    /* Fill argments size */";
+        if arg_size <> [""] then
+            "    " ^ concat "\n    " (set_args_size tfd)
+        else "/* There is no argments size */";
+        "";
+        sprintf "    in_buf_size += size_to_aligned_size(sizeof(%s_size_t));"
+          tfd.fname;
+
+        "    " ^ concat "\n    " (set_data_in tfd);
+        "";
+
+        "    " ^ concat "\n    " (set_data_out tfd);
+        "";
+        "    /* Allocate in_buf and out_buf */";
+        "    in_buf = (uint8_t*)malloc(in_buf_size);";
+        "    out_buf = (uint8_t*)malloc(out_buf_size);";
+        "    if (in_buf == NULL || out_buf == NULL) {";
+        "        ret = CC_ERROR_OUT_OF_MEMORY;";
+        "        goto exit;";
+        "    }";
+
+        "";
+        "    " ^ concat "\n    " (set_in_memcpy tfd);
+        "";
+        "    " ^ concat "\n    " (sl_async_set_call_user_func tfd);
+        "";
+        "    " ^ concat "\n    " (set_out_memcpy tfd);
+        "    ret = CC_SUCCESS;";
+        "";
+
+        "exit:";
+        "    if (in_buf)";
+        "        free(in_buf);";
+        "    if (out_buf)";
+        "        free(out_buf);";
+        "";
+         "    return ret;";
+         "}";
+     ]
+-- 
+2.27.0
+
--- a/0053-asynchronous-switchless-example.patch
+++ b/0053-asynchronous-switchless-example.patch
@ -0,0 +1,860 @@
+From 508f9aed76b8f6788be60a8e39849ee6c1a32fcc Mon Sep 17 00:00:00 2001
+From: modric <wangyu283@huawei.com>
+Date: Wed, 9 Nov 2022 15:19:58 +0800
+Subject: [PATCH 4/4] asynchronous switchless example
+
+---
+ .../enclave/CMakeLists.txt                    |  16 +-
+ .../switchless_performance/enclave/enclave.c  |  77 ++
+ examples/switchless_performance/host/main.c   | 656 +++++++++++++++++-
+ .../switchless_performance/switchless.edl     |  13 +
+ 4 files changed, 750 insertions(+), 12 deletions(-)
+
+diff --git a/examples/switchless_performance/enclave/CMakeLists.txt b/examples/switchless_performance/enclave/CMakeLists.txt
+index 69aab4c..f7b72b1 100644
+--- a/examples/switchless_performance/enclave/CMakeLists.txt
+++ b/examples/switchless_performance/enclave/CMakeLists.txt
+@@ -62,7 +62,21 @@ set(COMMON_C_FLAGS "-W -Wall -Werror -fno-short-enums -fno-omit-frame-pointer -f
+ set(COMMON_C_LINK_FLAGS "-Wl,-z,now -Wl,-z,relro -Wl,-z,noexecstack -Wl,-nostdlib -nodefaultlibs -nostartfiles")
+ 
+ if(CC_GP)
+-    set(CMAKE_C_FLAGS "${COMMON_C_FLAGS} -march=armv8-a")
+    if (CMAKE_COMPILER_IS_GNUCC)
+        execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpfullversion -dumpversion
+                        OUTPUT_VARIABLE GCC_VERSION)
+        string(REGEX MATCHALL "[0-9]+" GCC_VERSION_COMPONENTS ${GCC_VERSION})
+        list(GET GCC_VERSION_COMPONENTS 0 GCC_MAJOR)
+        list(GET GCC_VERSION_COMPONENTS 1 GCC_MINOR)
+        set(GCC_VERSION "${GCC_MAJOR}.${GCC_MINOR}")
+    endif()
+
+    if (GCC_VERSION GREATER_EQUAL "9.4")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a -mno-outline-atomics")
+    else()
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a")
+    endif()
+
+     set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS} -s -fPIC")
+     set(CMAKE_SHARED_LINKER_FLAGS "${COMMON_C_LINK_FLAGS} -Wl,-s")
+ 
+diff --git a/examples/switchless_performance/enclave/enclave.c b/examples/switchless_performance/enclave/enclave.c
+index 1320e53..8b1466f 100644
+--- a/examples/switchless_performance/enclave/enclave.c
+++ b/examples/switchless_performance/enclave/enclave.c
+@@ -28,10 +28,87 @@ void test_toupper(char *buf, int len)
+     }
+ }
+ 
+static int i = 0;
+static int j = 0;
+
+ void ecall_empty(void)
+ {
+    printf("normal %d\n", __atomic_add_fetch(&i, 1, __ATOMIC_ACQ_REL));
+}
+
+int ecall_empty1(char *buf, int len)
+{
+    printf("normal1 %d\n", __atomic_add_fetch(&i, 1, __ATOMIC_ACQ_REL));
+
+    if (buf == NULL || len < 0) {
+        return -1;
+    }
+
+    for (int i = 0; i < len; ++i) {
+        if (buf[i] >= 'a' && buf[i] <= 'z') {
+            buf[i] = buf[i] - ('a' - 'A');
+        }
+    }
+
+    return 1;
+}
+
+int ecall_empty2(char *buf1, int len1, char *buf2, int len2)
+{
+    printf("normal2 %d\n", __atomic_add_fetch(&i, 1, __ATOMIC_ACQ_REL));
+
+    if (buf1 == NULL || len1 < 0 || buf2 == NULL || len2 < 0) {
+        return -1;
+    }
+
+    for (int i = 0; i < len2; ++i) {
+        if (buf1[i] >= 'a' && buf1[i] <= 'z') {
+            buf2[i] = buf1[i] - ('a' - 'A');
+        } else {
+            buf2[i] = buf1[i];
+        }
+    }
+
+    return 2;
+ }
+ 
+ void ecall_empty_switchless(void)
+ {
+    printf("sl %d\n", __atomic_add_fetch(&j, 1, __ATOMIC_ACQ_REL));
+}
+
+int ecall_empty_switchless1(char *buf, int len)
+{
+    printf("sl1 %d\n", __atomic_add_fetch(&j, 1, __ATOMIC_ACQ_REL));
+
+    if (buf == NULL || len < 0) {
+        return -1;
+    }
+
+    for (int i = 0; i < len; ++i) {
+        if (buf[i] >= 'a' && buf[i] <= 'z') {
+            buf[i] = buf[i] - ('a' - 'A');
+        }
+    }
+
+    return 1;
+}
+
+int ecall_empty_switchless2(char *buf1, int len1, char *buf2, int len2)
+{
+    printf("sl2 %d\n", __atomic_add_fetch(&j, 1, __ATOMIC_ACQ_REL));
+
+    if (buf1 == NULL || len1 < 0 || buf2 == NULL || len2 < 0) {
+        return -1;
+    }
+
+    for (int i = 0; i < len2; ++i) {
+        if (buf1[i] >= 'a' && buf1[i] <= 'z') {
+            buf2[i] = buf1[i] - ('a' - 'A');
+        } else {
+            buf2[i] = buf1[i];
+        }
+    }
+
+    return 2;
+ }
+diff --git a/examples/switchless_performance/host/main.c b/examples/switchless_performance/host/main.c
+index f80db25..ea4994f 100644
+--- a/examples/switchless_performance/host/main.c
+++ b/examples/switchless_performance/host/main.c
+@@ -63,23 +63,600 @@ void fini_enclave(cc_enclave_t *enclave)
+ 
+ void benchmark_ecall_empty(bool is_switchless, unsigned long nrepeats)
+ {
+-    struct timespec time_start;
+-    struct timespec time_end;
+-    struct timespec duration = {0, 0};
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+     cc_enclave_result_t(*ecall_fn)(cc_enclave_t *) = is_switchless ? ecall_empty_switchless : ecall_empty;
+ 
+-    clock_gettime(CLOCK_REALTIME, &time_start);
+    gettimeofday(&tval_before, NULL);
+     unsigned long tmp_nrepeats = nrepeats;
+     while (tmp_nrepeats--) {
+         ecall_fn(&g_enclave);
+     }
+-    clock_gettime(CLOCK_REALTIME, &time_end);
+ 
+-    duration.tv_sec += time_end.tv_sec - time_start.tv_sec;
+-    duration.tv_nsec += time_end.tv_nsec - time_start.tv_nsec;
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+ 
+-    printf("Repeating an %s empty ecall for %lu times takes %lu.%09lus\n",
+-        is_switchless ? "[switchless]" : "[ ordinary ]", nrepeats, duration.tv_sec, duration.tv_nsec);
+    printf("Repeating an %s empty ecall for %lu times takes %ld.%06lds\n",
+        is_switchless ? "[switchless]" : "[ ordinary ]", nrepeats, (long)duration.tv_sec, (long)duration.tv_usec);
+}
+
+/* ecall_empty_switchless */
+void benchmark_ecall_empty_sl_async(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int retry_count = 0;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < nrepeats; ++i) {
+        ret_code = ecall_empty_switchless_async(&g_enclave, &arr[i]);
+        if (ret_code != CC_SUCCESS) {
+            if (ret_code == CC_ERROR_SWITCHLESS_TASK_POOL_FULL) {
+                // The task pool is full. You should try again later.
+                --i;
+                ++retry_count;
+            } else {
+                // Asynchronous invocation failed
+                printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+            }
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], NULL);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], NULL);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+
+    printf("retry_count:%d, processed_cursor:%d\n", retry_count, processed_cursor);
+    printf("Repeating an empty sl async ecall for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+}
+
+void benchmark_ecall_empty_sl_async_rollback(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int rollback_count = 0;
+    unsigned long tmp_nrepeats = nrepeats;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < tmp_nrepeats; ++i) {
+        ret_code = ecall_empty_switchless_async(&g_enclave, &arr[i]);
+        if (ret_code == CC_SUCCESS) {
+            if (arr[i] == -1) {
+                // rollback to common invoking when asynchronous switchless fails, and the common call is successful now
+                --i;
+                --tmp_nrepeats;
+                rollback_count++;
+            }
+        } else {
+            // Asynchronous invocation failed
+            printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], NULL);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < tmp_nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], NULL);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+
+    printf("rollback_count:%d, processed_cursor:%d\n", rollback_count, processed_cursor);
+    printf("Repeating an empty sl async ecall rollback for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+}
+
+
+/* ecall_empty_switchless1 */
+void benchmark_ecall_empty_sl_async1(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int retry_count = 0;
+    int one_share_buf_len = 32;
+    int retval;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    char *sharebuf = (char *)cc_malloc_shared_memory(&g_enclave, nrepeats * one_share_buf_len);
+    if (sharebuf == NULL) {
+        free(arr);
+        printf("Error: malloc shared memory failed.\n");
+        return;
+    }
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < nrepeats; ++i) {
+        strcpy(sharebuf + i * one_share_buf_len, "aAbBcCdD");
+        ret_code = ecall_empty_switchless1_async(&g_enclave, &arr[i], NULL, sharebuf + i * one_share_buf_len,
+            sizeof("aAbBcCdD"));
+        if (ret_code != CC_SUCCESS) {
+            if (ret_code == CC_ERROR_SWITCHLESS_TASK_POOL_FULL) {
+                // The task pool is full. You should try again later.
+                --i;
+                ++retry_count;
+            } else {
+                // Asynchronous invocation failed
+                printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+            }
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, and check the execution result.
+            if (retval != 1) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n", sharebuf + processed_cursor * one_share_buf_len,
+                    processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, and check the execution result.
+            if (retval != 1) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n", sharebuf + processed_cursor * one_share_buf_len,
+                    processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+
+    ret = cc_free_shared_memory(&g_enclave, sharebuf);
+    if (ret != CC_SUCCESS) {
+        printf("Error: free shared memory failed:%x.\n", ret);
+    }
+
+    printf("retry_count:%d, processed_cursor:%d\n", retry_count, processed_cursor);
+    printf("Repeating an empty sl async ecall [1] for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+}
+
+void benchmark_ecall_empty_sl_async_rollback1(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int one_share_buf_len = 32;
+    int rollback_count = 0;
+    int retval;
+    unsigned long tmp_nrepeats = nrepeats;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    char *sharebuf = (char *)cc_malloc_shared_memory(&g_enclave, nrepeats * one_share_buf_len);
+    if (sharebuf == NULL) {
+        free(arr);
+        printf("Error: malloc shared memory failed.\n");
+        return;
+    }
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < tmp_nrepeats; ++i) {
+        strcpy(sharebuf + i * one_share_buf_len, "aAbBcCdD");
+        ret_code = ecall_empty_switchless1_async(&g_enclave, &arr[i], &retval, sharebuf + i * one_share_buf_len,
+            sizeof("aAbBcCdD"));
+        if (ret_code == CC_SUCCESS) {
+            if (arr[i] == -1) {
+                /*
+                 * rollback to common invoking when asynchronous switchless fails, and the common call
+                 * is successful now, check the execution result.
+                 */
+                if (retval != 1) {
+                    printf("get result retval err:%d, index:%d\n", retval, i);
+                }
+
+                if (strcmp("AABBCCDD", sharebuf + i * one_share_buf_len)) {
+                    printf("get result buffer err:%s, index:%d\n", sharebuf + i * one_share_buf_len, i);
+                }
+
+                --i;
+                --tmp_nrepeats;
+                rollback_count++;
+            }
+        } else {
+            // Asynchronous invocation failed
+            printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 1) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n", sharebuf + processed_cursor * one_share_buf_len,
+                    processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < tmp_nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 1) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n", sharebuf + processed_cursor * one_share_buf_len,
+                    processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+    ret = cc_free_shared_memory(&g_enclave, sharebuf);
+    if (ret != CC_SUCCESS) {
+        printf("Error: free shared memory failed:%x.\n", ret);
+    }
+
+    printf("rollback_count:%d, processed_cursor:%d\n", rollback_count, processed_cursor);
+    printf("Repeating an empty sl async ecall rollback [1] for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+}
+
+/* ecall_empty_switchless2 */
+void benchmark_ecall_empty_sl_async2(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int retry_count = 0;
+    int one_share_buf_len = 32;
+    int half_one_share_buf_len = 16;
+    int retval;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    char *sharebuf = (char *)cc_malloc_shared_memory(&g_enclave, nrepeats * one_share_buf_len);
+    if (sharebuf == NULL) {
+        free(arr);
+        printf("Error: malloc shared memory failed.\n");
+        return;
+    }
+    memset(sharebuf, 0, nrepeats * one_share_buf_len);
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < nrepeats; ++i) {
+        strcpy(sharebuf + i * one_share_buf_len, "aAbBcCdD");
+        ret_code = ecall_empty_switchless2_async(&g_enclave, &arr[i], NULL, sharebuf + i * one_share_buf_len,
+            sizeof("aAbBcCdD"), sharebuf + i * one_share_buf_len + half_one_share_buf_len, sizeof("aAbBcCdD"));
+        if (ret_code != CC_SUCCESS) {
+            if (ret_code == CC_ERROR_SWITCHLESS_TASK_POOL_FULL) {
+                // The task pool is full. You should try again later.
+                --i;
+                ++retry_count;
+            } else {
+                // Asynchronous invocation failed
+                printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+            }
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 2) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n",
+                    sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len, processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 2) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n",
+                    sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len, processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+
+    ret = cc_free_shared_memory(&g_enclave, sharebuf);
+    if (ret != CC_SUCCESS) {
+        printf("Error: free shared memory failed:%x.\n", ret);
+    }
+
+    printf("retry_count:%d, processed_cursor:%d\n", retry_count, processed_cursor);
+    printf("Repeating an empty sl async ecall [2] for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+}
+
+void benchmark_ecall_empty_sl_async_rollback2(unsigned long nrepeats)
+{
+    cc_enclave_result_t ret_code;
+    cc_enclave_result_t ret;
+    struct timeval tval_before;
+    struct timeval tval_after;
+    struct timeval duration;
+    int processed_cursor = 0;
+    int one_share_buf_len = 32;
+    int half_one_share_buf_len = 16;
+    int rollback_count = 0;
+    int retval;
+    unsigned long tmp_nrepeats = nrepeats;
+
+    int *arr = (int *)calloc(nrepeats, sizeof(int));
+    if (arr == NULL) {
+        return;
+    }
+
+    char *sharebuf = (char *)cc_malloc_shared_memory(&g_enclave, nrepeats * one_share_buf_len);
+    if (sharebuf == NULL) {
+        free(arr);
+        printf("Error: malloc shared memory failed.\n");
+        return;
+    }
+
+    // BEGIN
+    gettimeofday(&tval_before, NULL);
+
+    for (int i = 0; i < tmp_nrepeats; ++i) {
+        strcpy(sharebuf + i * one_share_buf_len, "aAbBcCdD");
+        ret_code = ecall_empty_switchless2_async(&g_enclave, &arr[i], &retval, sharebuf + i * one_share_buf_len,
+            sizeof("aAbBcCdD"), sharebuf + i * one_share_buf_len + half_one_share_buf_len, sizeof("aAbBcCdD"));
+        if (ret_code == CC_SUCCESS) {
+            if (arr[i] == -1) {
+                /*
+                 * rollback to common invoking when asynchronous switchless fails, and the common call
+                 * is successful now, check the execution result.
+                 */
+                if (retval != 2) {
+                    printf("get result retval err:%d, index:%d\n", retval, i);
+                }
+
+                if (strcmp("AABBCCDD", sharebuf + i * one_share_buf_len + half_one_share_buf_len)) {
+                    printf("get result buffer err:%s, index:%d\n",
+                        sharebuf + i * one_share_buf_len + half_one_share_buf_len, i);
+                }
+
+                --i;
+                --tmp_nrepeats;
+                rollback_count++;
+            }
+        } else {
+            // Asynchronous invocation failed
+            printf("Asynchronous invocation failed, ret=%x\n", ret_code);
+        }
+
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 2) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n",
+                    sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len, processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    while (processed_cursor < tmp_nrepeats) {
+        ret = cc_sl_get_async_result(&g_enclave, arr[processed_cursor], &retval);
+        if (ret == CC_ERROR_SWITCHLESS_ASYNC_TASK_UNFINISHED) {
+            // Invoking processing
+            continue;
+        } else if (ret == CC_SUCCESS) {
+            // Obtaining the result succeeded, check the execution result.
+            if (retval != 2) {
+                printf("get result retval err:%d, index:%d\n", retval, processed_cursor);
+            }
+
+            if (strcmp("AABBCCDD", sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len)) {
+                printf("get result buffer err:%s, index:%d\n",
+                    sharebuf + processed_cursor * one_share_buf_len + half_one_share_buf_len, processed_cursor);
+            }
+
+            processed_cursor++;
+        } else {
+            // Failed to obtain the result
+            processed_cursor++;
+        }
+    }
+
+    // END
+    gettimeofday(&tval_after, NULL);
+    timersub(&tval_after, &tval_before, &duration);
+
+    free(arr);
+    ret = cc_free_shared_memory(&g_enclave, sharebuf);
+    if (ret != CC_SUCCESS) {
+        printf("Error: free shared memory failed:%x.\n", ret);
+    }
+
+    printf("rollback_count:%d, processed_cursor:%d\n", rollback_count, processed_cursor);
+    printf("Repeating an empty sl async ecall rollback [2] for %lu times takes %ld.%06lds\n", nrepeats,
+        (long int)duration.tv_sec, (long int)duration.tv_usec);
+ }
+ 
+ #define TEST_STR "switchless"
+@@ -106,11 +683,35 @@ void transfer_data_using_shared_memory()
+     }
+ }
+ 
+void onetime_normal(void)
+{
+    cc_enclave_result_t ret;
+    int retval;
+
+    char buf[] = "aAbBcCdD";
+    ret = ecall_empty1(&g_enclave, &retval, buf, sizeof(buf));
+    if (ret != CC_SUCCESS) {
+        printf("Error: ecall_empty1, ret:%x.\n", ret);
+        return;
+    }
+    printf("buf:%s, retval:%d\n", buf, retval);
+
+    char buf1[] = "aAbBcCdD";
+    char buf2[32] = {0};
+    ret = ecall_empty2(&g_enclave, &retval, buf1, sizeof(buf1), buf2, sizeof(buf1) - 3);
+    if (ret != CC_SUCCESS) {
+        printf("Error: ecall_empty2, ret:%x.\n", ret);
+        return;
+    }
+    printf("buf2:%s, retval:%d\n", buf2, retval);
+}
+
+ int main(void)
+ {
+     cc_sl_config_t sl_cfg = CC_USWITCHLESS_CONFIG_INITIALIZER;
+     sl_cfg.num_tworkers = 2; /* 2 tworkers */
+-    sl_cfg.sl_call_pool_size_qwords = 2; /* 2 * 64 tasks */
+    sl_cfg.sl_call_pool_size_qwords = 8; /* 2 * 64 tasks */
+    sl_cfg.rollback_to_common = false;
+     enclave_features_t features = {ENCLAVE_FEATURE_SWITCHLESS, (void *)&sl_cfg};
+ 
+     if (!init_enclave(&features)) {
+@@ -119,14 +720,47 @@ int main(void)
+     }
+ 
+     printf("\n1. Running a benchmark that compares [ordinary] and [switchless] ecall\n");
+-    unsigned long nrepeats = 100000;
+    unsigned long nrepeats = 10000;
+    benchmark_ecall_empty(false, nrepeats);
+    benchmark_ecall_empty(true, nrepeats);
+
+    benchmark_ecall_empty_sl_async(nrepeats);
+    benchmark_ecall_empty_sl_async1(nrepeats);
+    benchmark_ecall_empty_sl_async2(nrepeats);
+
+    printf("\n2. Transfer data using shared memory\n");
+    transfer_data_using_shared_memory();
+
+    printf("\n3. normal ecall\n");
+    onetime_normal();
+
+    fini_enclave(&g_enclave);
+
+#if 1
+    printf("\n=================================================\n");
+
+    sl_cfg.rollback_to_common = true;
+    if (!init_enclave(&features)) {
+        printf("Error: init enclave failed\n");
+        return -1;
+    }
+
+    printf("\n1. Running a benchmark that compares [ordinary] and [switchless] ecall\n");
+     benchmark_ecall_empty(false, nrepeats);
+     benchmark_ecall_empty(true, nrepeats);
+ 
+    benchmark_ecall_empty_sl_async_rollback(nrepeats);
+    benchmark_ecall_empty_sl_async_rollback1(nrepeats);
+    benchmark_ecall_empty_sl_async_rollback2(nrepeats);
+
+     printf("\n2. Transfer data using shared memory\n");
+     transfer_data_using_shared_memory();
+ 
+    printf("\n3. normal ecall\n");
+    onetime_normal();
+
+     fini_enclave(&g_enclave);
+#endif
+ 
+     return 0;
+ }
+diff --git a/examples/switchless_performance/switchless.edl b/examples/switchless_performance/switchless.edl
+index 344ee57..3c6f32e 100644
+--- a/examples/switchless_performance/switchless.edl
+++ b/examples/switchless_performance/switchless.edl
+@@ -16,8 +16,21 @@ enclave {
+     from "secgear_tswitchless.edl" import *;
+     trusted {
+         public void ecall_empty(void);
+
+        /* test [in, out] params */
+        public int ecall_empty1([in, out, size=len]char *buf, int len);
+
+        /* test [in] and [out] params */
+        public int ecall_empty2([in, size=len1]char *buf1, int len1, [out, size=len2]char *buf2, int len2);
+
+         public void ecall_empty_switchless(void) transition_using_threads;
+ 
+        /* test [in, out] params */
+        public int ecall_empty_switchless1([in, out, size=len]char *buf, int len) transition_using_threads;
+
+        /* test [in] and [out] params */
+        public int ecall_empty_switchless2([in, size=len1]char *buf1, int len1, [out, size=len2]char *buf2, int len2) transition_using_threads;
+
+         public void test_toupper([in, out, size=len]char *buf, int len) transition_using_threads;
+     };
+ };
+-- 
+2.27.0
+
--- a/secGear.spec
+++ b/secGear.spec
@ -1,6 +1,6 @@
 Name:		secGear
 Version:	0.1.0
-Release:	27
+Release:	28
 Summary:	secGear is an SDK to develop confidential computing apps based on hardware enclave features


@ -58,6 +58,10 @@ Patch45:        0046-fix-return-value.patch
 Patch46:        0047-del-print-uncontrol-form-string.patch
 Patch47:	0048-Delete-the-null-determination-of-out_buf-in-codegene.patch
 Patch48:        0049-support-switchless-feature.patch
+Patch49:        0050-switchless-schedule-policy.patch
+Patch50:        0051-asynchronous-switchless.patch
+Patch51:        0052-rollback-to-common-invoking-when-async-invoking-fail.patch
+Patch52:        0053-asynchronous-switchless-example.patch

 BuildRequires:	gcc python automake autoconf libtool
 BUildRequires:	glibc glibc-devel cmake ocaml-dune rpm gcc-c++
@ -176,6 +180,9 @@ popd
 systemctl restart rsyslog

 %changelog
+* Thu Nov 10 2022 wangyu <wangyu283@huawei.com> - 0.1.0-28
+- DESC: support switchless scheduling plicies and asynchronous calls
+
 * Tue Oct 18 2022 zhengxiaoxiao <zhengxiaoxiao2@huawei.com> - 0.1.0-27
 - DESC: support switchless feature