diff --git a/0040-libhns-Add-support-for-creating-extended-CQ.patch b/0040-libhns-Add-support-for-creating-extended-CQ.patch new file mode 100644 index 0000000..ce6ab1b --- /dev/null +++ b/0040-libhns-Add-support-for-creating-extended-CQ.patch @@ -0,0 +1,346 @@ +From d8596eff4eb46d1db1b6066e3bbbd03976f49e58 Mon Sep 17 00:00:00 2001 +From: Xinhao Liu +Date: Mon, 7 Mar 2022 18:49:35 +0800 +Subject: libhns: Add support for creating extended CQ + +The driver supports ibv_create_cq_ex() to create extended CQ. But the +driver does not yet support the extended attributes specified by +attr->com_mask and attr->wc_flas. + +Signed-off-by: Xinhao Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.c | 1 + + providers/hns/hns_roce_u.h | 6 ++- + providers/hns/hns_roce_u_abi.h | 5 +++ + providers/hns/hns_roce_u_hw_v1.c | 20 +++++----- + providers/hns/hns_roce_u_hw_v2.c | 16 ++++---- + providers/hns/hns_roce_u_verbs.c | 63 ++++++++++++++++++++++---------- + 6 files changed, 72 insertions(+), 39 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 6eac4ff0..f8a647b8 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -66,6 +66,7 @@ static const struct verbs_context_ops hns_common_ops = { + .bind_mw = hns_roce_u_bind_mw, + .cq_event = hns_roce_u_cq_event, + .create_cq = hns_roce_u_create_cq, ++ .create_cq_ex = hns_roce_u_create_cq_ex, + .create_qp = hns_roce_u_create_qp, + .create_qp_ex = hns_roce_u_create_qp_ex, + .dealloc_mw = hns_roce_u_dealloc_mw, +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 2b4ba181..505e7498 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -236,7 +236,7 @@ struct hns_roce_pd { + }; + + struct hns_roce_cq { +- struct ibv_cq ibv_cq; ++ struct verbs_cq verbs_cq; + struct hns_roce_buf buf; + pthread_spinlock_t lock; + unsigned int cqn; +@@ -406,7 +406,7 @@ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) + + static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) + { +- return container_of(ibv_cq, struct hns_roce_cq, ibv_cq); ++ return container_of(ibv_cq, struct hns_roce_cq, verbs_cq.cq); + } + + static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq) +@@ -447,6 +447,8 @@ int hns_roce_u_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, + struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); ++struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr); + + int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); + int hns_roce_u_destroy_cq(struct ibv_cq *cq); +diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h +index e56f9d35..333f977e 100644 +--- a/providers/hns/hns_roce_u_abi.h ++++ b/providers/hns/hns_roce_u_abi.h +@@ -39,8 +39,13 @@ + + DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, + empty, hns_roce_ib_alloc_pd_resp); ++ + DECLARE_DRV_CMD(hns_roce_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, + hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp); ++ ++DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, ++ hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp); ++ + DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, + empty, hns_roce_ib_alloc_ucontext_resp); + +diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c +index 28ad482c..d47cba0c 100644 +--- a/providers/hns/hns_roce_u_hw_v1.c ++++ b/providers/hns/hns_roce_u_hw_v1.c +@@ -161,10 +161,10 @@ static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry) + + static void *get_sw_cqe(struct hns_roce_cq *cq, int n) + { +- struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); ++ struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe); + + return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^ +- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL; ++ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL; + } + + static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq) +@@ -210,7 +210,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, + cur = wq->head - wq->tail; + pthread_spin_unlock(&cq->lock); + +- verbs_err(verbs_get_ctx(cq->ibv_cq.context), ++ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), + "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n", + wq->head, wq->tail, wq->max_post, nreq); + +@@ -274,10 +274,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, + if (!*cur_qp || + (local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->verbs_qp.qp.qp_num) { + +- *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context), ++ *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->verbs_cq.cq.context), + qpn & 0xffffff); + if (!*cur_qp) { +- verbs_err(verbs_get_ctx(cq->ibv_cq.context), ++ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), + PFX "can't find qp!\n"); + return CQ_POLL_ERR; + } +@@ -317,7 +317,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, + if (roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { +- verbs_err(verbs_get_ctx(cq->ibv_cq.context), ++ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context), + PFX "error cqe!\n"); + hns_roce_handle_error_cqe(cqe, wc); + return CQ_OK; +@@ -599,21 +599,21 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + uint32_t prod_index; + uint8_t owner_bit = 0; + struct hns_roce_cqe *cqe, *dest; +- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); ++ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); + + for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); + ++prod_index) +- if (prod_index == cq->cons_index + cq->ibv_cq.cqe) ++ if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe) + break; + + while ((int) --prod_index - (int) cq->cons_index >= 0) { +- cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); ++ cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe); + if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) { + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, +- (prod_index + nfreed) & cq->ibv_cq.cqe); ++ (prod_index + nfreed) & cq->verbs_cq.cq.cqe); + owner_bit = roce_get_bit(dest->cqe_byte_4, + CQE_BYTE_4_OWNER_S); + memcpy(dest, cqe, sizeof(*cqe)); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index bfd98760..07f3596d 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -189,10 +189,10 @@ static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) + + static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n) + { +- struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe); ++ struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->verbs_cq.cq.cqe); + +- return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ? +- cqe : NULL; ++ return (hr_reg_read(cqe, CQE_OWNER) ^ ++ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL; + } + + static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) +@@ -556,7 +556,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + struct hns_roce_qp **cur_qp, struct ibv_wc *wc) + { +- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); ++ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); + struct hns_roce_srq *srq = NULL; + struct hns_roce_v2_cqe *cqe; + uint8_t opcode; +@@ -1356,15 +1356,15 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + uint16_t wqe_index; + uint32_t prod_index; + struct hns_roce_v2_cqe *cqe, *dest; +- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); ++ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); + + for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index); + ++prod_index) +- if (prod_index > cq->cons_index + cq->ibv_cq.cqe) ++ if (prod_index > cq->cons_index + cq->verbs_cq.cq.cqe) + break; + + while ((int) --prod_index - (int) cq->cons_index >= 0) { +- cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe); ++ cqe = get_cqe_v2(cq, prod_index & cq->verbs_cq.cq.cqe); + if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) { + is_recv_cqe = hr_reg_read(cqe, CQE_S_R); + +@@ -1375,7 +1375,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + ++nfreed; + } else if (nfreed) { + dest = get_cqe_v2(cq, +- (prod_index + nfreed) & cq->ibv_cq.cqe); ++ (prod_index + nfreed) & cq->verbs_cq.cq.cqe); + owner_bit = hr_reg_read(dest, CQE_OWNER); + memcpy(dest, cqe, cq->cqe_size); + hr_reg_write_bool(dest, CQE_OWNER, owner_bit); +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 3cc9e0c2..a993c39a 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -276,12 +276,17 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) + return 0; + } + +-static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context) ++static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, ++ struct hns_roce_context *context) + { +- if (*cqe < 1 || *cqe > context->max_cqe) ++ if (!attr->cqe || attr->cqe > context->max_cqe) + return -EINVAL; + +- *cqe = max((uint64_t)HNS_ROCE_MIN_CQE_NUM, roundup_pow_of_two(*cqe)); ++ if (attr->comp_mask || attr->wc_flags) ++ return -EOPNOTSUPP; ++ ++ attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, ++ roundup_pow_of_two(attr->cqe)); + + return 0; + } +@@ -297,25 +302,25 @@ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) + } + + static int exec_cq_create_cmd(struct ibv_context *context, +- struct hns_roce_cq *cq, int cqe, +- struct ibv_comp_channel *channel, int comp_vector) ++ struct hns_roce_cq *cq, ++ struct ibv_cq_init_attr_ex *attr) + { ++ struct hns_roce_create_cq_ex_resp resp_ex = {}; + struct hns_roce_ib_create_cq_resp *resp_drv; +- struct hns_roce_create_cq_resp resp = {}; ++ struct hns_roce_create_cq_ex cmd_ex = {}; + struct hns_roce_ib_create_cq *cmd_drv; +- struct hns_roce_create_cq cmd = {}; + int ret; + +- cmd_drv = &cmd.drv_payload; +- resp_drv = &resp.drv_payload; ++ cmd_drv = &cmd_ex.drv_payload; ++ resp_drv = &resp_ex.drv_payload; + + cmd_drv->buf_addr = (uintptr_t)cq->buf.buf; + cmd_drv->db_addr = (uintptr_t)cq->db; + cmd_drv->cqe_size = (uintptr_t)cq->cqe_size; + +- ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector, +- &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd), +- &resp.ibv_resp, sizeof(resp)); ++ ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq, ++ &cmd_ex.ibv_cmd, sizeof(cmd_ex), ++ &resp_ex.ibv_resp, sizeof(resp_ex), 0); + if (ret) + return ret; + +@@ -325,16 +330,15 @@ static int exec_cq_create_cmd(struct ibv_context *context, + return 0; + } + +-struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, +- struct ibv_comp_channel *channel, +- int comp_vector) ++static struct ibv_cq_ex *create_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *attr) + { + struct hns_roce_device *hr_dev = to_hr_dev(context->device); + struct hns_roce_context *hr_ctx = to_hr_ctx(context); + struct hns_roce_cq *cq; + int ret; + +- ret = hns_roce_verify_cq(&cqe, hr_ctx); ++ ret = verify_cq_create_attr(attr, hr_ctx); + if (ret) + goto err; + +@@ -348,7 +352,7 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + if (ret) + goto err_lock; + +- cq->cq_depth = cqe; ++ cq->cq_depth = attr->cqe; + cq->cqe_size = hr_ctx->cqe_size; + + ret = hns_roce_alloc_cq_buf(cq); +@@ -363,13 +367,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + + *cq->db = 0; + +- ret = exec_cq_create_cmd(context, cq, cqe, channel, comp_vector); ++ ret = exec_cq_create_cmd(context, cq, attr); + if (ret) + goto err_cmd; + + cq->arm_sn = 1; + +- return &cq->ibv_cq; ++ return &cq->verbs_cq.cq_ex; + + err_cmd: + if (hr_dev->hw_version != HNS_ROCE_HW_VER1) +@@ -387,6 +391,27 @@ err: + return NULL; + } + ++struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, ++ int comp_vector) ++{ ++ struct ibv_cq_ex *cq; ++ struct ibv_cq_init_attr_ex attr = { ++ .cqe = cqe, ++ .channel = channel, ++ .comp_vector = comp_vector, ++ }; ++ ++ cq = create_cq(context, &attr); ++ return cq ? ibv_cq_ex_to_cq(cq) : NULL; ++} ++ ++struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *attr) ++{ ++ return create_cq(context, attr); ++} ++ + void hns_roce_u_cq_event(struct ibv_cq *cq) + { + to_hr_cq(cq)->arm_sn++; +-- +2.30.0 + diff --git a/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch b/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch new file mode 100644 index 0000000..f816bcb --- /dev/null +++ b/0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch @@ -0,0 +1,415 @@ +From 0464e0cb0416d679aba3b58261bbd2cadb74fd03 Mon Sep 17 00:00:00 2001 +From: Xinhao Liu +Date: Mon, 7 Mar 2022 18:49:36 +0800 +Subject: libhns: Extended CQ supports the new polling mechanism + +ofed provides new polling APIs for extended CQ. With the new APIs, users +can poll the extended CQ more efficiently. + +Signed-off-by: Xinhao Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.h | 1 + + providers/hns/hns_roce_u_hw_v2.c | 319 +++++++++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.h | 1 + + providers/hns/hns_roce_u_verbs.c | 18 +- + 4 files changed, 337 insertions(+), 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 505e7498..70ac6e5b 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -247,6 +247,7 @@ struct hns_roce_cq { + int arm_sn; + unsigned long flags; + unsigned int cqe_size; ++ struct hns_roce_v2_cqe *cqe; + }; + + struct hns_roce_idx_que { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 07f3596d..081ab1f3 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1657,6 +1657,325 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + return ret; + } + ++static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, ++ struct hns_roce_cq *cq) ++{ ++ struct hns_roce_wq *wq = &hr_qp->sq; ++ ++ if (hr_qp->sq_signal_bits) ++ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); ++ ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++} ++ ++static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, ++ struct hns_roce_cq *cq) ++{ ++ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; ++ hns_roce_free_srq_wqe(srq, wqe_idx); ++} ++ ++static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq) ++{ ++ struct hns_roce_wq *wq = &hr_qp->rq; ++ ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++} ++ ++static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, ++ struct hns_roce_cq *cq) ++{ ++ struct hns_roce_v2_cqe *cqe = cq->cqe; ++ struct hns_roce_srq *srq = NULL; ++ uint32_t wqe_idx; ++ ++ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); ++ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { ++ cqe_proc_sq(qp, wqe_idx, cq); ++ } else { ++ if (get_srq_from_cqe(cqe, ctx, qp, &srq)) ++ return V2_CQ_POLL_ERR; ++ ++ if (srq) ++ cqe_proc_srq(srq, wqe_idx, cq); ++ else ++ cqe_proc_rq(qp, cq); ++ } ++ return 0; ++} ++ ++static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status) ++{ ++ int i; ++ ++ static const struct { ++ unsigned int cqe_status; ++ enum ibv_wc_status wc_status; ++ } map[] = { ++ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, ++ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, ++ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, ++ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, ++ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, ++ IBV_WC_RETRY_EXC_ERR }, ++ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, ++ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, ++ }; ++ ++ cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR; ++ for (i = 0; i < ARRAY_SIZE(map); i++) { ++ if (status == map[i].cqe_status) { ++ cq->verbs_cq.cq_ex.status = map[i].wc_status; ++ break; ++ } ++ } ++} ++ ++static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) ++{ ++ struct hns_roce_qp *qp = NULL; ++ struct hns_roce_v2_cqe *cqe; ++ uint8_t status; ++ uint32_t qpn; ++ ++ cqe = next_cqe_sw_v2(cq); ++ if (!cqe) ++ return ENOENT; ++ ++ ++cq->cons_index; ++ udma_from_device_barrier(); ++ ++ cq->cqe = cqe; ++ qpn = hr_reg_read(cqe, CQE_LCL_QPN); ++ ++ qp = hns_roce_v2_find_qp(ctx, qpn); ++ if (!qp) ++ return V2_CQ_POLL_ERR; ++ ++ if (cqe_proc_wq(ctx, qp, cq)) ++ return V2_CQ_POLL_ERR; ++ ++ status = hr_reg_read(cqe, CQE_STATUS); ++ ++ /* ++ * once a cqe in error status, the driver needs to help the HW to ++ * generated flushed cqes for all subsequent wqes ++ */ ++ if (status != HNS_ROCE_V2_CQE_SUCCESS) { ++ handle_error_cqe_ex(cq, status); ++ return hns_roce_flush_cqe(qp, status); ++ } ++ ++ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; ++ ++ return V2_CQ_OK; ++} ++ ++static int wc_start_poll_cq(struct ibv_cq_ex *current, ++ struct ibv_poll_cq_attr *attr) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ struct hns_roce_context *ctx = to_hr_ctx(current->context); ++ int err; ++ ++ if (attr->comp_mask) ++ return EINVAL; ++ ++ pthread_spin_lock(&cq->lock); ++ ++ err = wc_poll_cqe(ctx, cq); ++ if (err != V2_CQ_OK) ++ pthread_spin_unlock(&cq->lock); ++ ++ return err; ++} ++ ++static int wc_next_poll_cq(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ struct hns_roce_context *ctx = to_hr_ctx(current->context); ++ int err; ++ ++ err = wc_poll_cqe(ctx, cq); ++ if (err != V2_CQ_OK) ++ return err; ++ ++ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) ++ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; ++ else ++ update_cq_db(ctx, cq); ++ ++ return 0; ++} ++ ++static void wc_end_poll_cq(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ struct hns_roce_context *ctx = to_hr_ctx(current->context); ++ ++ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) ++ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; ++ else ++ update_cq_db(ctx, cq); ++ ++ pthread_spin_unlock(&cq->lock); ++} ++ ++static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE); ++ ++ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) ++ return wc_send_op_map[opcode]; ++ else ++ return wc_rcv_op_map[opcode]; ++} ++ ++static uint32_t wc_read_vendor_err(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return hr_reg_read(cq->cqe, CQE_SUB_STATUS); ++} ++ ++static uint32_t wc_read_byte_len(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return le32toh(cq->cqe->byte_cnt); ++} ++ ++static __be32 wc_read_imm_data(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ if (hr_reg_read(cq->cqe, CQE_OPCODE) == HNS_ROCE_RECV_OP_SEND_WITH_INV) ++ /* This is returning invalidate_rkey which is in host order, see ++ * ibv_wc_read_invalidated_rkey. ++ */ ++ return (__force __be32)le32toh(cq->cqe->rkey); ++ ++ return htobe32(le32toh(cq->cqe->immtdata)); ++} ++ ++static uint32_t wc_read_qp_num(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return hr_reg_read(cq->cqe, CQE_LCL_QPN); ++} ++ ++static uint32_t wc_read_src_qp(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return hr_reg_read(cq->cqe, CQE_RMT_QPN); ++} ++ ++static unsigned int get_wc_flags_for_sq(uint8_t opcode) ++{ ++ switch (opcode) { ++ case HNS_ROCE_SQ_OP_SEND_WITH_IMM: ++ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: ++ return IBV_WC_WITH_IMM; ++ case HNS_ROCE_SQ_OP_LOCAL_INV: ++ return IBV_WC_WITH_INV; ++ default: ++ return 0; ++ } ++} ++ ++static unsigned int get_wc_flags_for_rq(uint8_t opcode) ++{ ++ switch (opcode) { ++ case HNS_ROCE_RECV_OP_RDMA_WRITE_IMM: ++ case HNS_ROCE_RECV_OP_SEND_WITH_IMM: ++ return IBV_WC_WITH_IMM; ++ case HNS_ROCE_RECV_OP_SEND_WITH_INV: ++ return IBV_WC_WITH_INV; ++ default: ++ return 0; ++ } ++} ++ ++static unsigned int wc_read_wc_flags(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE); ++ unsigned int wc_flags; ++ ++ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) { ++ wc_flags = get_wc_flags_for_sq(opcode); ++ } else { ++ wc_flags = get_wc_flags_for_rq(opcode); ++ wc_flags |= hr_reg_read(cq->cqe, CQE_GRH) ? IBV_WC_GRH : 0; ++ } ++ ++ return wc_flags; ++} ++ ++static uint32_t wc_read_slid(struct ibv_cq_ex *current) ++{ ++ return 0; ++} ++ ++static uint8_t wc_read_sl(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return (uint8_t)hr_reg_read(cq->cqe, CQE_SL); ++} ++ ++static uint8_t wc_read_dlid_path_bits(struct ibv_cq_ex *current) ++{ ++ return 0; ++} ++ ++static uint16_t wc_read_cvlan(struct ibv_cq_ex *current) ++{ ++ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); ++ ++ return hr_reg_read(cq->cqe, CQE_VID_VLD) ? ++ hr_reg_read(cq->cqe, CQE_VID) : 0; ++} ++ ++void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags) ++{ ++ cq_ex->start_poll = wc_start_poll_cq; ++ cq_ex->next_poll = wc_next_poll_cq; ++ cq_ex->end_poll = wc_end_poll_cq; ++ cq_ex->read_opcode = wc_read_opcode; ++ cq_ex->read_vendor_err = wc_read_vendor_err; ++ cq_ex->read_wc_flags = wc_read_wc_flags; ++ ++ if (wc_flags & IBV_WC_EX_WITH_BYTE_LEN) ++ cq_ex->read_byte_len = wc_read_byte_len; ++ if (wc_flags & IBV_WC_EX_WITH_IMM) ++ cq_ex->read_imm_data = wc_read_imm_data; ++ if (wc_flags & IBV_WC_EX_WITH_QP_NUM) ++ cq_ex->read_qp_num = wc_read_qp_num; ++ if (wc_flags & IBV_WC_EX_WITH_SRC_QP) ++ cq_ex->read_src_qp = wc_read_src_qp; ++ if (wc_flags & IBV_WC_EX_WITH_SLID) ++ cq_ex->read_slid = wc_read_slid; ++ if (wc_flags & IBV_WC_EX_WITH_SL) ++ cq_ex->read_sl = wc_read_sl; ++ if (wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) ++ cq_ex->read_dlid_path_bits = wc_read_dlid_path_bits; ++ if (wc_flags & IBV_WC_EX_WITH_CVLAN) ++ cq_ex->read_cvlan = wc_read_cvlan; ++} ++ + const struct hns_roce_u_hw hns_roce_u_hw_v2 = { + .hw_version = HNS_ROCE_HW_VER2, + .hw_ops = { +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 92e5f1a4..0068f4fe 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -337,5 +337,6 @@ struct hns_roce_ud_sq_wqe { + #define MAX_SERVICE_LEVEL 0x7 + + void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); ++void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); + + #endif /* _HNS_ROCE_U_HW_V2_H */ +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index a993c39a..9ea8a6d3 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -276,13 +276,21 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) + return 0; + } + ++enum { ++ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | ++ IBV_WC_EX_WITH_CVLAN, ++}; ++ + static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + struct hns_roce_context *context) + { + if (!attr->cqe || attr->cqe > context->max_cqe) + return -EINVAL; + +- if (attr->comp_mask || attr->wc_flags) ++ if (attr->comp_mask) ++ return -EOPNOTSUPP; ++ ++ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) + return -EOPNOTSUPP; + + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, +@@ -409,7 +417,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe, + struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr) + { +- return create_cq(context, attr); ++ struct ibv_cq_ex *cq; ++ ++ cq = create_cq(context, attr); ++ if (cq) ++ hns_roce_attach_cq_ex_ops(cq, attr->wc_flags); ++ ++ return cq; + } + + void hns_roce_u_cq_event(struct ibv_cq *cq) +-- +2.30.0 + diff --git a/0042-libhns-Optimize-the-error-handling-of-CQE.patch b/0042-libhns-Optimize-the-error-handling-of-CQE.patch new file mode 100644 index 0000000..4ee4882 --- /dev/null +++ b/0042-libhns-Optimize-the-error-handling-of-CQE.patch @@ -0,0 +1,160 @@ +From 2d48954e9b2617cb48f7d5ba47a10ceda4e556ff Mon Sep 17 00:00:00 2001 +From: Xinhao Liu +Date: Mon, 7 Mar 2022 18:49:37 +0800 +Subject: libhns: Optimize the error handling of CQE + +Separate the acquisition of wc->status and wc->vendor_err to make the logic +of error handling clearer. + +Signed-off-by: Xinhao Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 81 ++++++++------------------------ + 1 file changed, 19 insertions(+), 62 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 081ab1f3..2804450d 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -146,13 +146,13 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return 0; + } + +-static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, +- uint8_t status) ++static enum ibv_wc_status get_wc_status(uint8_t status) + { + static const struct { + unsigned int cqe_status; + enum ibv_wc_status wc_status; + } map[] = { ++ { HNS_ROCE_V2_CQE_SUCCESS, IBV_WC_SUCCESS }, + { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, + { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, + { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, +@@ -169,17 +169,12 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, + }; + +- int i; +- +- wc->status = IBV_WC_GENERAL_ERR; +- for (i = 0; i < ARRAY_SIZE(map); i++) { +- if (status == map[i].cqe_status) { +- wc->status = map[i].wc_status; +- break; +- } ++ for (int i = 0; i < ARRAY_SIZE(map); i++) { ++ if (status == map[i].cqe_status) ++ return map[i].wc_status; + } + +- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); ++ return IBV_WC_GENERAL_ERR; + } + + static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) +@@ -581,7 +576,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + return V2_CQ_POLL_ERR; + } + +- status = hr_reg_read(cqe, CQE_STATUS); + opcode = hr_reg_read(cqe, CQE_OPCODE); + is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; + if (is_send) { +@@ -603,18 +597,18 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + + wc->qp_num = qpn; + ++ status = hr_reg_read(cqe, CQE_STATUS); ++ wc->status = get_wc_status(status); ++ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); ++ ++ if (status == HNS_ROCE_V2_CQE_SUCCESS) ++ return V2_CQ_OK; ++ + /* + * once a cqe in error status, the driver needs to help the HW to + * generated flushed cqes for all subsequent wqes + */ +- if (status != HNS_ROCE_V2_CQE_SUCCESS) { +- handle_error_cqe(cqe, wc, status); +- return hns_roce_flush_cqe(*cur_qp, status); +- } +- +- wc->status = IBV_WC_SUCCESS; +- +- return V2_CQ_OK; ++ return hns_roce_flush_cqe(*cur_qp, status); + } + + static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, +@@ -1706,40 +1700,6 @@ static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, + return 0; + } + +-static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status) +-{ +- int i; +- +- static const struct { +- unsigned int cqe_status; +- enum ibv_wc_status wc_status; +- } map[] = { +- { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, +- { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, +- { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, +- { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, +- { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, +- { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, +- { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, +- { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, +- { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, +- { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, +- { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, +- IBV_WC_RETRY_EXC_ERR }, +- { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, +- { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, +- { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, +- }; +- +- cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR; +- for (i = 0; i < ARRAY_SIZE(map); i++) { +- if (status == map[i].cqe_status) { +- cq->verbs_cq.cq_ex.status = map[i].wc_status; +- break; +- } +- } +-} +- + static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) + { + struct hns_roce_qp *qp = NULL; +@@ -1765,19 +1725,16 @@ static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) + return V2_CQ_POLL_ERR; + + status = hr_reg_read(cqe, CQE_STATUS); ++ cq->verbs_cq.cq_ex.status = get_wc_status(status); ++ ++ if (status == HNS_ROCE_V2_CQE_SUCCESS) ++ return V2_CQ_OK; + + /* + * once a cqe in error status, the driver needs to help the HW to + * generated flushed cqes for all subsequent wqes + */ +- if (status != HNS_ROCE_V2_CQE_SUCCESS) { +- handle_error_cqe_ex(cq, status); +- return hns_roce_flush_cqe(qp, status); +- } +- +- cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; +- +- return V2_CQ_OK; ++ return hns_roce_flush_cqe(qp, status); + } + + static int wc_start_poll_cq(struct ibv_cq_ex *current, +-- +2.30.0 + diff --git a/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch b/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch new file mode 100644 index 0000000..2d90158 --- /dev/null +++ b/0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch @@ -0,0 +1,323 @@ +From 9dd7b55957ccc720a6844613af9d43680d8fbaad Mon Sep 17 00:00:00 2001 +From: Xinhao Liu +Date: Mon, 7 Mar 2022 18:49:38 +0800 +Subject: libhns: Refactor hns roce v2 poll one() and wc poll cqe() + +hns_roce_v2_poll_one() and wc_poll_cqe() have a lot of repetitive code. +Aggregating the repetitive parts of these two functions into one function +hns_roce_poll_one() can reduce the repetitive code. + +Signed-off-by: Xinhao Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 228 +++++++++++++++---------------- + 1 file changed, 107 insertions(+), 121 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 2804450d..42a77151 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -285,6 +285,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, + + static void hns_roce_update_sq_db(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) ++ + { + struct hns_roce_db sq_db = {}; + +@@ -548,21 +549,101 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + wc->opcode = wc_send_op_map[opcode]; + } + +-static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, +- struct hns_roce_qp **cur_qp, struct ibv_wc *wc) ++static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, ++ struct hns_roce_cq *cq) + { +- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context); ++ struct hns_roce_wq *wq = &hr_qp->sq; ++ ++ if (hr_qp->sq_signal_bits) ++ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); ++ ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++} ++ ++static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, ++ struct hns_roce_cq *cq) ++{ ++ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; ++ hns_roce_free_srq_wqe(srq, wqe_idx); ++} ++ ++static void cqe_proc_rq(struct hns_roce_wq *wq, struct hns_roce_cq *cq) ++{ ++ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++} ++ ++static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, ++ struct hns_roce_cq *cq) ++{ ++ struct hns_roce_v2_cqe *cqe = cq->cqe; ++ struct hns_roce_srq *srq = NULL; ++ uint32_t wqe_idx; ++ ++ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); ++ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { ++ cqe_proc_sq(qp, wqe_idx, cq); ++ } else { ++ if (get_srq_from_cqe(cqe, ctx, qp, &srq)) ++ return V2_CQ_POLL_ERR; ++ ++ if (srq) ++ cqe_proc_srq(srq, wqe_idx, cq); ++ else ++ cqe_proc_rq(&qp->rq, cq); ++ } ++ ++ return 0; ++} ++ ++static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq, ++ struct hns_roce_qp *cur_qp, struct ibv_wc *wc) ++{ ++ struct hns_roce_v2_cqe *cqe = cq->cqe; + struct hns_roce_srq *srq = NULL; +- struct hns_roce_v2_cqe *cqe; + uint8_t opcode; +- uint8_t status; ++ ++ if (!wc) { ++ if (cqe_proc_wq(ctx, cur_qp, cq)) ++ return V2_CQ_POLL_ERR; ++ ++ return 0; ++ } ++ ++ opcode = hr_reg_read(cqe, CQE_OPCODE); ++ ++ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { ++ parse_cqe_for_req(cqe, wc, cur_qp, opcode); ++ } else { ++ wc->byte_len = le32toh(cqe->byte_cnt); ++ get_opcode_for_resp(cqe, wc, opcode); ++ ++ if (get_srq_from_cqe(cqe, ctx, cur_qp, &srq)) ++ return V2_CQ_POLL_ERR; ++ ++ if (srq) ++ parse_cqe_for_srq(cqe, wc, srq); ++ else ++ parse_cqe_for_resp(cqe, wc, cur_qp, opcode); ++ } ++ ++ return 0; ++} ++ ++static int hns_roce_poll_one(struct hns_roce_context *ctx, ++ struct hns_roce_qp **cur_qp, struct hns_roce_cq *cq, ++ struct ibv_wc *wc) ++{ ++ struct hns_roce_v2_cqe *cqe; ++ uint8_t status, wc_status; + uint32_t qpn; +- bool is_send; + + cqe = next_cqe_sw_v2(cq); + if (!cqe) +- return V2_CQ_EMPTY; ++ return wc ? V2_CQ_EMPTY : ENOENT; + ++ cq->cqe = cqe; + ++cq->cons_index; + + udma_from_device_barrier(); +@@ -576,31 +657,20 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + return V2_CQ_POLL_ERR; + } + +- opcode = hr_reg_read(cqe, CQE_OPCODE); +- is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; +- if (is_send) { +- parse_cqe_for_req(cqe, wc, *cur_qp, opcode); +- } else { +- wc->byte_len = le32toh(cqe->byte_cnt); +- get_opcode_for_resp(cqe, wc, opcode); ++ if (parse_cqe_for_cq(ctx, cq, *cur_qp, wc)) ++ return V2_CQ_POLL_ERR; + +- if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq)) +- return V2_CQ_POLL_ERR; ++ status = hr_reg_read(cqe, CQE_STATUS); ++ wc_status = get_wc_status(status); + +- if (srq) { +- parse_cqe_for_srq(cqe, wc, srq); +- } else { +- if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode)) +- return V2_CQ_POLL_ERR; +- } ++ if (wc) { ++ wc->status = wc_status; ++ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); ++ wc->qp_num = qpn; ++ } else { ++ cq->verbs_cq.cq_ex.status = wc_status; + } + +- wc->qp_num = qpn; +- +- status = hr_reg_read(cqe, CQE_STATUS); +- wc->status = get_wc_status(status); +- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); +- + if (status == HNS_ROCE_V2_CQE_SUCCESS) + return V2_CQ_OK; + +@@ -614,16 +684,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + struct ibv_wc *wc) + { +- int npolled; +- int err = V2_CQ_OK; +- struct hns_roce_qp *qp = NULL; +- struct hns_roce_cq *cq = to_hr_cq(ibvcq); + struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context); ++ struct hns_roce_cq *cq = to_hr_cq(ibvcq); ++ struct hns_roce_qp *qp = NULL; ++ int err = V2_CQ_OK; ++ int npolled; + + pthread_spin_lock(&cq->lock); + + for (npolled = 0; npolled < ne; ++npolled) { +- err = hns_roce_v2_poll_one(cq, &qp, wc + npolled); ++ err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); + if (err != V2_CQ_OK) + break; + } +@@ -1651,97 +1721,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + return ret; + } + +-static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, +- struct hns_roce_cq *cq) +-{ +- struct hns_roce_wq *wq = &hr_qp->sq; +- +- if (hr_qp->sq_signal_bits) +- wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); +- +- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; +- ++wq->tail; +-} +- +-static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx, +- struct hns_roce_cq *cq) +-{ +- cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; +- hns_roce_free_srq_wqe(srq, wqe_idx); +-} +- +-static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq) +-{ +- struct hns_roce_wq *wq = &hr_qp->rq; +- +- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; +- ++wq->tail; +-} +- +-static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp, +- struct hns_roce_cq *cq) +-{ +- struct hns_roce_v2_cqe *cqe = cq->cqe; +- struct hns_roce_srq *srq = NULL; +- uint32_t wqe_idx; +- +- wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); +- if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) { +- cqe_proc_sq(qp, wqe_idx, cq); +- } else { +- if (get_srq_from_cqe(cqe, ctx, qp, &srq)) +- return V2_CQ_POLL_ERR; +- +- if (srq) +- cqe_proc_srq(srq, wqe_idx, cq); +- else +- cqe_proc_rq(qp, cq); +- } +- return 0; +-} +- +-static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq) +-{ +- struct hns_roce_qp *qp = NULL; +- struct hns_roce_v2_cqe *cqe; +- uint8_t status; +- uint32_t qpn; +- +- cqe = next_cqe_sw_v2(cq); +- if (!cqe) +- return ENOENT; +- +- ++cq->cons_index; +- udma_from_device_barrier(); +- +- cq->cqe = cqe; +- qpn = hr_reg_read(cqe, CQE_LCL_QPN); +- +- qp = hns_roce_v2_find_qp(ctx, qpn); +- if (!qp) +- return V2_CQ_POLL_ERR; +- +- if (cqe_proc_wq(ctx, qp, cq)) +- return V2_CQ_POLL_ERR; +- +- status = hr_reg_read(cqe, CQE_STATUS); +- cq->verbs_cq.cq_ex.status = get_wc_status(status); +- +- if (status == HNS_ROCE_V2_CQE_SUCCESS) +- return V2_CQ_OK; +- +- /* +- * once a cqe in error status, the driver needs to help the HW to +- * generated flushed cqes for all subsequent wqes +- */ +- return hns_roce_flush_cqe(qp, status); +-} +- + static int wc_start_poll_cq(struct ibv_cq_ex *current, + struct ibv_poll_cq_attr *attr) + { + struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); ++ struct hns_roce_qp *qp = NULL; + int err; + + if (attr->comp_mask) +@@ -1749,7 +1734,7 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, + + pthread_spin_lock(&cq->lock); + +- err = wc_poll_cqe(ctx, cq); ++ err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) + pthread_spin_unlock(&cq->lock); + +@@ -1760,9 +1745,10 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) + { + struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); ++ struct hns_roce_qp *qp = NULL; + int err; + +- err = wc_poll_cqe(ctx, cq); ++ err = hns_roce_poll_one(ctx, &qp, cq, NULL); + if (err != V2_CQ_OK) + return err; + +-- +2.30.0 + diff --git a/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch b/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch new file mode 100644 index 0000000..c6a7721 --- /dev/null +++ b/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch @@ -0,0 +1,1007 @@ +From 36446a56eea5db54e229207bf39c796df16f519a Mon Sep 17 00:00:00 2001 +From: Xinhao Liu +Date: Mon, 21 Mar 2022 09:32:04 +0800 +Subject: libhns: Extended QP supports the new post send mechanism + +The ofed provides a new set of post send APIs for extended QP. With the new +APIs, users can post send WR more efficiently. The hns driver provides +support for the new APIs. + +Signed-off-by: Xinhao Liu +Signed-off-by: Yixing Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.h | 6 + + providers/hns/hns_roce_u_hw_v2.c | 814 +++++++++++++++++++++++++++++-- + providers/hns/hns_roce_u_hw_v2.h | 7 + + providers/hns/hns_roce_u_verbs.c | 11 +- + 4 files changed, 792 insertions(+), 46 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 171fe06e..96059172 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -338,6 +338,12 @@ struct hns_roce_qp { + unsigned long flags; + int refcnt; /* specially used for XRC */ + void *dwqe_page; ++ ++ /* specific fields for the new post send APIs */ ++ int err; ++ void *cur_wqe; ++ unsigned int rb_sq_head; /* roll back sq head */ ++ struct hns_roce_sge_info sge_info; + }; + + struct hns_roce_av { +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index fab1939b..0169250d 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -78,7 +78,7 @@ static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu) + static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n); + + static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, +- struct ibv_sge *sg) ++ const struct ibv_sge *sg) + { + dseg->lkey = htole32(sg->lkey); + dseg->addr = htole64(sg->addr); +@@ -824,9 +824,28 @@ static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg, + sge_info->total_len = len; + } + ++static void get_src_buf_info(void **src_addr, uint32_t *src_len, ++ const void *buf_list, int buf_idx, ++ enum hns_roce_wr_buf_type type) ++{ ++ if (type == WR_BUF_TYPE_POST_SEND) { ++ const struct ibv_sge *sg_list = buf_list; ++ ++ *src_addr = (void *)(uintptr_t)sg_list[buf_idx].addr; ++ *src_len = sg_list[buf_idx].length; ++ } else { ++ const struct ibv_data_buf *bf_list = buf_list; ++ ++ *src_addr = bf_list[buf_idx].addr; ++ *src_len = bf_list[buf_idx].length; ++ } ++} ++ + static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, +- const struct ibv_send_wr *wr, +- struct hns_roce_sge_info *sge_info) ++ struct hns_roce_sge_info *sge_info, ++ const void *buf_list, ++ uint32_t num_buf, ++ enum hns_roce_wr_buf_type buf_type) + { + unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg); + unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; +@@ -834,18 +853,15 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + uint32_t src_len, tail_len; + int i; + +- + if (sge_info->total_len > qp->sq.max_gs * sge_sz) + return EINVAL; + + dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask); + tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask); + +- for (i = 0; i < wr->num_sge; i++) { ++ for (i = 0; i < num_buf; i++) { + tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr; +- +- src_addr = (void *)(uintptr_t)wr->sg_list[i].addr; +- src_len = wr->sg_list[i].length; ++ get_src_buf_info(&src_addr, &src_len, buf_list, i, buf_type); + + if (src_len < tail_len) { + memcpy(dst_addr, src_addr, src_len); +@@ -870,20 +886,11 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + return 0; + } + +-static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, +- struct hns_roce_ud_sq_wqe *ud_sq_wqe) ++static void set_ud_inl_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, ++ uint8_t *data) + { +- uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {0}; + uint32_t *loc = (uint32_t *)data; + uint32_t tmp_data; +- void *tmp = data; +- int i; +- +- for (i = 0; i < wr->num_sge; i++) { +- memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr, +- wr->sg_list[i].length); +- tmp += wr->sg_list[i].length; +- } + + hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff); + hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff); +@@ -896,6 +903,22 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, + hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16); + } + ++static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, ++ struct hns_roce_ud_sq_wqe *ud_sq_wqe) ++{ ++ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; ++ void *tmp = data; ++ int i; ++ ++ for (i = 0; i < wr->num_sge; i++) { ++ memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr, ++ wr->sg_list[i].length); ++ tmp += wr->sg_list[i].length; ++ } ++ ++ set_ud_inl_seg(ud_sq_wqe, data); ++} ++ + static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) + { + int mtu = mtu_enum_to_int(qp->path_mtu); +@@ -919,7 +942,9 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + } else { + hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE); + +- ret = fill_ext_sge_inl_data(qp, wr, sge_info); ++ ret = fill_ext_sge_inl_data(qp, sge_info, ++ wr->sg_list, wr->num_sge, ++ WR_BUF_TYPE_POST_SEND); + if (ret) + return ret; + +@@ -995,6 +1020,23 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, + return ret; + } + ++static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe, ++ unsigned int index) ++{ ++ struct hns_roce_rc_sq_wqe *wqe = sq_wqe; ++ ++ /* ++ * The pipeline can sequentially post all valid WQEs in wq buf, ++ * including those new WQEs waiting for doorbell to update the PI again. ++ * Therefore, the valid bit of WQE MUST be updated after all of fields ++ * and extSGEs have been written into DDR instead of cache. ++ */ ++ if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) ++ udma_to_device_barrier(); ++ ++ hr_reg_write_bool(wqe, RCWQE_OWNER, !(index & BIT(qp->sq.shift))); ++} ++ + static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + unsigned int nreq, struct hns_roce_sge_info *sge_info) + { +@@ -1026,17 +1068,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + if (ret) + return ret; + +- /* +- * The pipeline can sequentially post all valid WQEs in wq buf, +- * including those new WQEs waiting for doorbell to update the PI again. +- * Therefore, the valid bit of WQE MUST be updated after all of fields +- * and extSGEs have been written into DDR instead of cache. +- */ +- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) +- udma_to_device_barrier(); +- +- hr_reg_write_bool(wqe, RCWQE_OWNER, +- !((qp->sq.head + nreq) & BIT(qp->sq.shift))); ++ enable_wqe(qp, ud_sq_wqe, qp->sq.head + nreq); + + return ret; + } +@@ -1068,7 +1100,9 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + } else { + hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE); + +- ret = fill_ext_sge_inl_data(qp, wr, sge_info); ++ ret = fill_ext_sge_inl_data(qp, sge_info, ++ wr->sg_list, wr->num_sge, ++ WR_BUF_TYPE_POST_SEND); + if (ret) + return ret; + +@@ -1189,17 +1223,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return ret; + + wqe_valid: +- /* +- * The pipeline can sequentially post all valid WQEs into WQ buffer, +- * including new WQEs waiting for the doorbell to update the PI again. +- * Therefore, the owner bit of WQE MUST be updated after all fields +- * and extSGEs have been written into DDR instead of cache. +- */ +- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) +- udma_to_device_barrier(); +- +- hr_reg_write_bool(wqe, RCWQE_OWNER, +- !((qp->sq.head + nreq) & BIT(qp->sq.shift))); ++ enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq); + + return 0; + } +@@ -1921,6 +1945,710 @@ void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags) + cq_ex->read_cvlan = wc_read_cvlan; + } + ++static struct hns_roce_rc_sq_wqe * ++init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) ++{ ++ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags; ++ struct hns_roce_rc_sq_wqe *wqe; ++ unsigned int wqe_idx; ++ ++ if (hns_roce_v2_wq_overflow(&qp->sq, 0, ++ to_hr_cq(qp->verbs_qp.qp.send_cq))) { ++ qp->cur_wqe = NULL; ++ qp->err = ENOMEM; ++ return NULL; ++ } ++ ++ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ wqe = get_send_wqe(qp, wqe_idx); ++ ++ hr_reg_write(wqe, RCWQE_OPCODE, opcode); ++ hr_reg_write_bool(wqe, RCWQE_CQE, send_flags & IBV_SEND_SIGNALED); ++ hr_reg_write_bool(wqe, RCWQE_FENCE, send_flags & IBV_SEND_FENCE); ++ hr_reg_write_bool(wqe, RCWQE_SE, send_flags & IBV_SEND_SOLICITED); ++ hr_reg_clear(wqe, RCWQE_INLINE); ++ hr_reg_clear(wqe, RCWQE_SO); ++ ++ qp->sq.wrid[wqe_idx] = wr_id; ++ qp->cur_wqe = wqe; ++ qp->sq.head++; ++ ++ return wqe; ++} ++ ++static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey, ++ uint64_t addr, uint32_t length) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; ++ ++ if (!wqe) ++ return; ++ ++ hr_reg_write(wqe, RCWQE_LKEY0, lkey); ++ hr_reg_write(wqe, RCWQE_VA0_L, addr); ++ hr_reg_write(wqe, RCWQE_VA0_H, addr >> 32); ++ ++ wqe->msg_len = htole32(length); ++ hr_reg_write(wqe, RCWQE_LEN0, length); ++ hr_reg_write(wqe, RCWQE_SGE_NUM, !!length); ++ /* ignore ex sge start index */ ++ ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg, ++ struct hns_roce_qp *qp, const struct ibv_sge *sge, ++ size_t num_sge) ++{ ++ unsigned int index = qp->sge_info.start_idx; ++ unsigned int mask = qp->ex_sge.sge_cnt - 1; ++ unsigned int msg_len = 0; ++ unsigned int cnt = 0; ++ int i; ++ ++ for (i = 0; i < num_sge; i++) { ++ if (!sge[i].length) ++ continue; ++ ++ msg_len += sge[i].length; ++ cnt++; ++ ++ if (cnt <= HNS_ROCE_SGE_IN_WQE) { ++ set_data_seg_v2(dseg, &sge[i]); ++ dseg++; ++ } else { ++ dseg = get_send_sge_ex(qp, index & mask); ++ set_data_seg_v2(dseg, &sge[i]); ++ index++; ++ } ++ } ++ ++ qp->sge_info.start_idx = index; ++ qp->sge_info.valid_num = cnt; ++ qp->sge_info.total_len = msg_len; ++} ++ ++static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge, ++ const struct ibv_sge *sg_list) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; ++ struct hns_roce_v2_wqe_data_seg *dseg; ++ ++ if (!wqe) ++ return; ++ ++ if (num_sge > qp->sq.max_gs) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX, ++ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); ++ ++ dseg = (void *)(wqe + 1); ++ set_sgl_rc(dseg, qp, sg_list, num_sge); ++ ++ wqe->msg_len = htole32(qp->sge_info.total_len); ++ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); ++ ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_send_rc(struct ibv_qp_ex *ibv_qp) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ ++ init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND); ++} ++ ++static void wr_send_imm_rc(struct ibv_qp_ex *ibv_qp, __be32 imm_data) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM); ++ if (!wqe) ++ return; ++ ++ wqe->immtdata = htole32(be32toh(imm_data)); ++} ++ ++static void wr_send_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_INV); ++ if (!wqe) ++ return; ++ ++ wqe->inv_key = htole32(invalidate_rkey); ++} ++ ++static void wr_local_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_LOCAL_INV); ++ if (!wqe) ++ return; ++ ++ hr_reg_enable(wqe, RCWQE_SO); ++ wqe->inv_key = htole32(invalidate_rkey); ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_set_xrc_srqn(struct ibv_qp_ex *ibv_qp, uint32_t remote_srqn) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; ++ ++ if (!wqe) ++ return; ++ ++ hr_reg_write(wqe, RCWQE_XRC_SRQN, remote_srqn); ++} ++ ++static void wr_rdma_read(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_READ); ++ if (!wqe) ++ return; ++ ++ wqe->va = htole64(remote_addr); ++ wqe->rkey = htole32(rkey); ++} ++ ++static void wr_rdma_write(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_WRITE); ++ if (!wqe) ++ return; ++ ++ wqe->va = htole64(remote_addr); ++ wqe->rkey = htole32(rkey); ++} ++ ++static void wr_rdma_write_imm(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr, __be32 imm_data) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, ++ HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM); ++ if (!wqe) ++ return; ++ ++ wqe->va = htole64(remote_addr); ++ wqe->rkey = htole32(rkey); ++ wqe->immtdata = htole32(be32toh(imm_data)); ++} ++ ++static void set_wr_atomic(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr, uint64_t compare_add, ++ uint64_t swap, uint32_t opcode) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_v2_wqe_data_seg *dseg; ++ struct hns_roce_wqe_atomic_seg *aseg; ++ struct hns_roce_rc_sq_wqe *wqe; ++ ++ wqe = init_rc_wqe(qp, ibv_qp->wr_id, opcode); ++ if (!wqe) ++ return; ++ ++ wqe->va = htole64(remote_addr); ++ wqe->rkey = htole32(rkey); ++ ++ dseg = (void *)(wqe + 1); ++ aseg = (void *)(dseg + 1); ++ ++ if (opcode == HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP) { ++ aseg->fetchadd_swap_data = htole64(swap); ++ aseg->cmp_data = htole64(compare_add); ++ } else { ++ aseg->fetchadd_swap_data = htole64(compare_add); ++ aseg->cmp_data = 0; ++ } ++} ++ ++static void wr_atomic_cmp_swp(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr, uint64_t compare, ++ uint64_t swap) ++{ ++ set_wr_atomic(ibv_qp, rkey, remote_addr, compare, swap, ++ HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP); ++} ++ ++static void wr_atomic_fetch_add(struct ibv_qp_ex *ibv_qp, uint32_t rkey, ++ uint64_t remote_addr, uint64_t add) ++{ ++ set_wr_atomic(ibv_qp, rkey, remote_addr, add, 0, ++ HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD); ++} ++ ++static void set_inline_data_list_rc(struct hns_roce_qp *qp, ++ struct hns_roce_rc_sq_wqe *wqe, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ unsigned int msg_len = qp->sge_info.total_len; ++ void *dseg; ++ int ret; ++ int i; ++ ++ hr_reg_enable(wqe, RCWQE_INLINE); ++ ++ wqe->msg_len = htole32(msg_len); ++ if (msg_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { ++ hr_reg_clear(wqe, RCWQE_INLINE_TYPE); ++ /* ignore ex sge start index */ ++ ++ dseg = wqe + 1; ++ for (i = 0; i < num_buf; i++) { ++ memcpy(dseg, buf_list[i].addr, buf_list[i].length); ++ dseg += buf_list[i].length; ++ } ++ /* ignore sge num */ ++ } else { ++ if (!check_inl_data_len(qp, msg_len)) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_enable(wqe, RCWQE_INLINE_TYPE); ++ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX, ++ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); ++ ++ ret = fill_ext_sge_inl_data(qp, &qp->sge_info, ++ buf_list, num_buf, ++ WR_BUF_TYPE_SEND_WR_OPS); ++ if (ret) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num); ++ } ++} ++ ++static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr, ++ size_t length) ++{ ++ struct ibv_data_buf buff = { .addr = addr, .length = length }; ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; ++ ++ if (!wqe) ++ return; ++ ++ buff.addr = addr; ++ buff.length = length; ++ ++ qp->sge_info.total_len = length; ++ set_inline_data_list_rc(qp, wqe, 1, &buff); ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe; ++ int i; ++ ++ if (!wqe) ++ return; ++ ++ qp->sge_info.total_len = 0; ++ for (i = 0; i < num_buf; i++) ++ qp->sge_info.total_len += buf_list[i].length; ++ ++ set_inline_data_list_rc(qp, wqe, num_buf, buf_list); ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static struct hns_roce_ud_sq_wqe * ++init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode) ++{ ++ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags; ++ struct hns_roce_ud_sq_wqe *wqe; ++ unsigned int wqe_idx; ++ ++ if (hns_roce_v2_wq_overflow(&qp->sq, 0, ++ to_hr_cq(qp->verbs_qp.qp.send_cq))) { ++ qp->cur_wqe = NULL; ++ qp->err = ENOMEM; ++ return NULL; ++ } ++ ++ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1); ++ wqe = get_send_wqe(qp, wqe_idx); ++ ++ hr_reg_write(wqe, UDWQE_OPCODE, opcode); ++ hr_reg_write_bool(wqe, UDWQE_CQE, send_flags & IBV_SEND_SIGNALED); ++ hr_reg_write_bool(wqe, UDWQE_SE, send_flags & IBV_SEND_SOLICITED); ++ hr_reg_clear(wqe, UDWQE_INLINE); ++ ++ qp->sq.wrid[wqe_idx] = wr_id; ++ qp->cur_wqe = wqe; ++ qp->sq.head++; ++ ++ return wqe; ++} ++ ++static void wr_send_ud(struct ibv_qp_ex *ibv_qp) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ ++ init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND); ++} ++ ++static void wr_send_imm_ud(struct ibv_qp_ex *ibv_qp, __be32 imm_data) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_ud_sq_wqe *wqe; ++ ++ wqe = init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM); ++ if (!wqe) ++ return; ++ ++ wqe->immtdata = htole32(be32toh(imm_data)); ++} ++ ++static void wr_set_ud_addr(struct ibv_qp_ex *ibv_qp, struct ibv_ah *ah, ++ uint32_t remote_qpn, uint32_t remote_qkey) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; ++ struct hns_roce_ah *hr_ah = to_hr_ah(ah); ++ int ret; ++ ++ if (!wqe) ++ return; ++ ++ wqe->qkey = htole32(remote_qkey & 0x80000000 ? qp->qkey : remote_qkey); ++ ++ hr_reg_write(wqe, UDWQE_DQPN, remote_qpn); ++ ++ ret = fill_ud_av(wqe, hr_ah); ++ if (ret) ++ qp->err = ret; ++ ++ qp->sl = hr_ah->av.sl; ++} ++ ++static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey, ++ uint64_t addr, uint32_t length) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; ++ struct hns_roce_v2_wqe_data_seg *dseg; ++ int sge_idx; ++ ++ if (!wqe) ++ return; ++ ++ wqe->msg_len = htole32(length); ++ hr_reg_write(wqe, UDWQE_SGE_NUM, 1); ++ sge_idx = qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1); ++ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx); ++ ++ dseg = get_send_sge_ex(qp, sge_idx); ++ ++ dseg->lkey = htole32(lkey); ++ dseg->addr = htole64(addr); ++ dseg->len = htole32(length); ++ ++ qp->sge_info.start_idx++; ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge, ++ const struct ibv_sge *sg_list) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ unsigned int sge_idx = qp->sge_info.start_idx; ++ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; ++ unsigned int mask = qp->ex_sge.sge_cnt - 1; ++ struct hns_roce_v2_wqe_data_seg *dseg; ++ unsigned int msg_len = 0; ++ unsigned int cnt = 0; ++ ++ if (!wqe) ++ return; ++ ++ if (num_sge > qp->sq.max_gs) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask); ++ for (int i = 0; i < num_sge; i++) { ++ if (!sg_list[i].length) ++ continue; ++ ++ dseg = get_send_sge_ex(qp, sge_idx & mask); ++ set_data_seg_v2(dseg, &sg_list[i]); ++ ++ msg_len += sg_list[i].length; ++ cnt++; ++ sge_idx++; ++ } ++ ++ wqe->msg_len = htole32(msg_len); ++ hr_reg_write(wqe, UDWQE_SGE_NUM, cnt); ++ ++ qp->sge_info.start_idx += cnt; ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void set_inline_data_list_ud(struct hns_roce_qp *qp, ++ struct hns_roce_ud_sq_wqe *wqe, ++ size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {}; ++ unsigned int msg_len = qp->sge_info.total_len; ++ void *tmp; ++ int ret; ++ int i; ++ ++ if (!check_inl_data_len(qp, msg_len)) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_enable(wqe, UDWQE_INLINE); ++ ++ wqe->msg_len = htole32(msg_len); ++ if (msg_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { ++ hr_reg_clear(wqe, UDWQE_INLINE_TYPE); ++ /* ignore ex sge start index */ ++ ++ tmp = data; ++ for (i = 0; i < num_buf; i++) { ++ memcpy(tmp, buf_list[i].addr, buf_list[i].length); ++ tmp += buf_list[i].length; ++ } ++ ++ set_ud_inl_seg(wqe, data); ++ /* ignore sge num */ ++ } else { ++ hr_reg_enable(wqe, UDWQE_INLINE_TYPE); ++ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, ++ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1)); ++ ++ ret = fill_ext_sge_inl_data(qp, &qp->sge_info, ++ buf_list, num_buf, ++ WR_BUF_TYPE_SEND_WR_OPS); ++ if (ret) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ hr_reg_write(wqe, UDWQE_SGE_NUM, qp->sge_info.valid_num); ++ } ++} ++ ++static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr, ++ size_t length) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; ++ struct ibv_data_buf buff; ++ ++ if (!wqe) ++ return; ++ ++ buff.addr = addr; ++ buff.length = length; ++ ++ qp->sge_info.total_len = length; ++ set_inline_data_list_ud(qp, wqe, 1, &buff); ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf, ++ const struct ibv_data_buf *buf_list) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe; ++ int i; ++ ++ if (!wqe) ++ return; ++ ++ qp->sge_info.total_len = 0; ++ for (i = 0; i < num_buf; i++) ++ qp->sge_info.total_len += buf_list[i].length; ++ ++ set_inline_data_list_ud(qp, wqe, num_buf, buf_list); ++ enable_wqe(qp, wqe, qp->sq.head); ++} ++ ++static void wr_start(struct ibv_qp_ex *ibv_qp) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ enum ibv_qp_state state = ibv_qp->qp_base.state; ++ ++ if (state == IBV_QPS_RESET || ++ state == IBV_QPS_INIT || ++ state == IBV_QPS_RTR) { ++ qp->err = EINVAL; ++ return; ++ } ++ ++ pthread_spin_lock(&qp->sq.lock); ++ qp->sge_info.start_idx = qp->next_sge; ++ qp->rb_sq_head = qp->sq.head; ++ qp->err = 0; ++} ++ ++static int wr_complete(struct ibv_qp_ex *ibv_qp) ++{ ++ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ unsigned int nreq = qp->sq.head - qp->rb_sq_head; ++ struct ibv_qp_attr attr; ++ int err = qp->err; ++ ++ if (err) { ++ qp->sq.head = qp->rb_sq_head; ++ goto out; ++ } ++ ++ if (nreq) { ++ qp->next_sge = qp->sge_info.start_idx; ++ udma_to_device_barrier(); ++ ++ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) ++ hns_roce_write_dwqe(qp, qp->cur_wqe); ++ else ++ hns_roce_update_sq_db(ctx, qp); ++ ++ if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) ++ *(qp->sdb) = qp->sq.head & 0xffff; ++ } ++ ++out: ++ pthread_spin_unlock(&qp->sq.lock); ++ if (ibv_qp->qp_base.state == IBV_QPS_ERR) { ++ attr.qp_state = IBV_QPS_ERR; ++ hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE); ++ } ++ ++ return err; ++} ++ ++static void wr_abort(struct ibv_qp_ex *ibv_qp) ++{ ++ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); ++ ++ qp->sq.head = qp->rb_sq_head; ++ ++ pthread_spin_unlock(&qp->sq.lock); ++} ++ ++enum { ++ HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_INV | ++ IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_WRITE | ++ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ | ++ IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | ++ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | ++ IBV_QP_EX_WITH_LOCAL_INV, ++ HNS_SUPPORTED_SEND_OPS_FLAGS_UD = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_IMM, ++}; ++ ++static void fill_send_wr_ops_rc_xrc(struct ibv_qp_ex *qp_ex) ++{ ++ qp_ex->wr_send = wr_send_rc; ++ qp_ex->wr_send_imm = wr_send_imm_rc; ++ qp_ex->wr_send_inv = wr_send_inv_rc; ++ qp_ex->wr_rdma_read = wr_rdma_read; ++ qp_ex->wr_rdma_write = wr_rdma_write; ++ qp_ex->wr_rdma_write_imm = wr_rdma_write_imm; ++ qp_ex->wr_set_inline_data = wr_set_inline_data_rc; ++ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_rc; ++ qp_ex->wr_local_inv = wr_local_inv_rc; ++ qp_ex->wr_atomic_cmp_swp = wr_atomic_cmp_swp; ++ qp_ex->wr_atomic_fetch_add = wr_atomic_fetch_add; ++ qp_ex->wr_set_sge = wr_set_sge_rc; ++ qp_ex->wr_set_sge_list = wr_set_sge_list_rc; ++} ++ ++static void fill_send_wr_ops_ud(struct ibv_qp_ex *qp_ex) ++{ ++ qp_ex->wr_send = wr_send_ud; ++ qp_ex->wr_send_imm = wr_send_imm_ud; ++ qp_ex->wr_set_ud_addr = wr_set_ud_addr; ++ qp_ex->wr_set_inline_data = wr_set_inline_data_ud; ++ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_ud; ++ qp_ex->wr_set_sge = wr_set_sge_ud; ++ qp_ex->wr_set_sge_list = wr_set_sge_list_ud; ++} ++ ++static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr, ++ struct ibv_qp_ex *qp_ex) ++{ ++ uint64_t ops = attr->send_ops_flags; ++ ++ qp_ex->wr_start = wr_start; ++ qp_ex->wr_complete = wr_complete; ++ qp_ex->wr_abort = wr_abort; ++ ++ switch (attr->qp_type) { ++ case IBV_QPT_XRC_SEND: ++ qp_ex->wr_set_xrc_srqn = wr_set_xrc_srqn; ++ SWITCH_FALLTHROUGH; ++ case IBV_QPT_RC: ++ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC) ++ return -EOPNOTSUPP; ++ fill_send_wr_ops_rc_xrc(qp_ex); ++ break; ++ case IBV_QPT_UD: ++ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_UD) ++ return -EOPNOTSUPP; ++ fill_send_wr_ops_ud(qp_ex); ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ ++int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp) ++{ ++ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ if (fill_send_wr_ops(attr, &qp->verbs_qp.qp_ex)) ++ return -EOPNOTSUPP; ++ ++ qp->verbs_qp.comp_mask |= VERBS_QP_EX; ++ } ++ ++ return 0; ++} ++ + const struct hns_roce_u_hw hns_roce_u_hw_v2 = { + .hw_version = HNS_ROCE_HW_VER2, + .hw_ops = { +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 122fdbdf..098dbdf4 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -122,6 +122,11 @@ enum { + HNS_ROCE_V2_CQ_DB_NTR, + }; + ++enum hns_roce_wr_buf_type { ++ WR_BUF_TYPE_POST_SEND, ++ WR_BUF_TYPE_SEND_WR_OPS, ++}; ++ + struct hns_roce_db { + __le32 byte_4; + __le32 parameter; +@@ -339,5 +344,7 @@ struct hns_roce_ud_sq_wqe { + + void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); + void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); ++int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, ++ struct hns_roce_qp *qp); + + #endif /* _HNS_ROCE_U_HW_V2_H */ +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 9ea8a6d3..1457a1a2 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -768,7 +768,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) + } + + enum { +- CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD, ++ CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD | ++ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, + }; + + static int check_qp_create_mask(struct hns_roce_context *ctx, +@@ -1270,9 +1271,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + if (ret) + goto err_cmd; + ++ ret = hns_roce_attach_qp_ex_ops(attr, qp); ++ if (ret) ++ goto err_ops; ++ + ret = hns_roce_store_qp(context, qp); + if (ret) +- goto err_store; ++ goto err_ops; + + if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) { + ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key); +@@ -1286,7 +1291,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + + err_dwqe: + hns_roce_v2_clear_qp(context, qp); +-err_store: ++err_ops: + ibv_cmd_destroy_qp(&qp->verbs_qp.qp); + err_cmd: + hns_roce_free_qp_buf(qp, context); +-- +2.30.0 + diff --git a/0045-libhns-Add-general-error-type-for-CQE.patch b/0045-libhns-Add-general-error-type-for-CQE.patch new file mode 100644 index 0000000..b41b68e --- /dev/null +++ b/0045-libhns-Add-general-error-type-for-CQE.patch @@ -0,0 +1,58 @@ +From 1e5f8bb89169453cfdd17bf58cef7186dcf58596 Mon Sep 17 00:00:00 2001 +From: Youming Luo +Date: Wed, 16 Mar 2022 17:36:39 +0800 +Subject: libhns: Add general error type for CQE + +If a Work Request posted in an RQ of UD QP isn't big enough for holding the +incoming message, then the hns ROCEE will generate a general error CQE. The +IB specification does not specify this type of CQE. + +In the case of unreliable communication, it is not desirable to change the +QP to an error state due to an insufficient receive length error. So If the +hns ROCEE reports a general error CQE, it's no need to set the QP to an +error state, and the driver should skip it. + +Signed-off-by: Youming Luo +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 4 +++- + providers/hns/hns_roce_u_hw_v2.h | 1 + + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 42a77151..fab1939b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -166,6 +166,7 @@ static enum ibv_wc_status get_wc_status(uint8_t status) + { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR }, + { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, + { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, ++ { HNS_ROCE_V2_CQE_GENERAL_ERR, IBV_WC_GENERAL_ERR }, + { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, + }; + +@@ -671,7 +672,8 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, + cq->verbs_cq.cq_ex.status = wc_status; + } + +- if (status == HNS_ROCE_V2_CQE_SUCCESS) ++ if (status == HNS_ROCE_V2_CQE_SUCCESS || ++ status == HNS_ROCE_V2_CQE_GENERAL_ERR) + return V2_CQ_OK; + + /* +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 0068f4fe..122fdbdf 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -110,6 +110,7 @@ enum { + HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR = 0x15, + HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR = 0x16, + HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR = 0x22, ++ HNS_ROCE_V2_CQE_GENERAL_ERR = 0x23, + HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR = 0x24, + }; + +-- +2.30.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 328e86d..5b23b25 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 35.1 -Release: 5 +Release: 6 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -46,6 +46,12 @@ Patch36: 0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch Patch37: 0037-libhns-Fix-wrong-HIP08-version-macro.patch Patch38: 0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch Patch39: 0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch +Patch40: 0040-libhns-Add-support-for-creating-extended-CQ.patch +Patch41: 0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch +Patch42: 0042-libhns-Optimize-the-error-handling-of-CQE.patch +Patch43: 0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch +Patch44: 0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch +Patch45: 0045-libhns-Add-general-error-type-for-CQE.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -290,6 +296,12 @@ fi %{_mandir}/* %changelog +* Wed Aug 24 2022 luozhengfeng - 35.1-6 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add support for hns CQ_EX + * Fri Aug 12 2022 luozhengfeng - 35.1-5 - Type: bugfix - ID: NA