diff --git a/0025-libhns-Refactor-the-poll-one-interface.patch b/0025-libhns-Refactor-the-poll-one-interface.patch new file mode 100644 index 0000000..dbcdf37 --- /dev/null +++ b/0025-libhns-Refactor-the-poll-one-interface.patch @@ -0,0 +1,545 @@ +From 0851ae661c4fe4dd285c22c6acce462fc8004b8d Mon Sep 17 00:00:00 2001 +From: Yixian Liu +Date: Thu, 18 Nov 2021 22:46:10 +0800 +Subject: libhns: Refactor the poll one interface + +Mainly about: + +1. Separate the differences between various objects (such as sq, rq, srq) + into functions. +2. Optimize function names, variable names, and comments to increase code + readability. +3. Use map instead of switch branch to simplify the code. + +Signed-off-by: Yixian Liu +Signed-off-by: Yangyang Li +Signed-off-by: Xinhao Liu +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 373 +++++++++++++++---------------- + providers/hns/hns_roce_u_hw_v2.h | 10 +- + 2 files changed, 189 insertions(+), 194 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 5fb6477..1b4e91b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -154,59 +154,37 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return 0; + } + +-static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe, +- struct ibv_wc *wc) +-{ +- unsigned int status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, +- CQE_BYTE_4_STATUS_S); +- unsigned int cqe_status = status & HNS_ROCE_V2_CQE_STATUS_MASK; ++static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ uint8_t status) ++{ ++ static const struct { ++ unsigned int cqe_status; ++ enum ibv_wc_status wc_status; ++ } map[] = { ++ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR }, ++ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, ++ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR }, ++ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, ++ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR }, ++ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR }, ++ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR }, ++ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR }, ++ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR }, ++ }; + +- switch (cqe_status) { +- case HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR: +- wc->status = IBV_WC_LOC_LEN_ERR; +- break; +- case HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR: +- wc->status = IBV_WC_LOC_QP_OP_ERR; +- break; +- case HNS_ROCE_V2_CQE_LOCAL_PROT_ERR: +- wc->status = IBV_WC_LOC_PROT_ERR; +- break; +- case HNS_ROCE_V2_CQE_WR_FLUSH_ERR: +- wc->status = IBV_WC_WR_FLUSH_ERR; +- break; +- case HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR: +- wc->status = IBV_WC_MW_BIND_ERR; +- break; +- case HNS_ROCE_V2_CQE_BAD_RESP_ERR: +- wc->status = IBV_WC_BAD_RESP_ERR; +- break; +- case HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR: +- wc->status = IBV_WC_LOC_ACCESS_ERR; +- break; +- case HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR: +- wc->status = IBV_WC_REM_INV_REQ_ERR; +- break; +- case HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR: +- wc->status = IBV_WC_REM_ACCESS_ERR; +- break; +- case HNS_ROCE_V2_CQE_REMOTE_OP_ERR: +- wc->status = IBV_WC_REM_OP_ERR; +- break; +- case HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR: +- wc->status = IBV_WC_RETRY_EXC_ERR; +- break; +- case HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR: +- wc->status = IBV_WC_RNR_RETRY_EXC_ERR; +- break; +- case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR: +- wc->status = IBV_WC_REM_ABORT_ERR; +- break; +- case HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR: +- wc->status = IBV_WC_REM_INV_RD_REQ_ERR; +- break; +- default: +- wc->status = IBV_WC_GENERAL_ERR; +- break; ++ int i; ++ ++ wc->status = IBV_WC_GENERAL_ERR; ++ for (i = 0; i < ARRAY_SIZE(map); i++) { ++ if (status == map[i].cqe_status) { ++ wc->status = map[i].wc_status; ++ break; ++ } + } + } + +@@ -268,6 +246,27 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind) + pthread_spin_unlock(&srq->lock); + } + ++static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, ++ struct hns_roce_context *ctx, ++ struct hns_roce_qp *hr_qp, ++ struct hns_roce_srq **srq) ++{ ++ uint32_t srqn; ++ ++ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { ++ srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, ++ CQE_BYTE_12_XRC_SRQN_S); ++ ++ *srq = hns_roce_find_srq(ctx, srqn); ++ if (!*srq) ++ return -EINVAL; ++ } else if (hr_qp->verbs_qp.qp.srq) { ++ *srq = to_hr_srq(hr_qp->verbs_qp.qp.srq); ++ } ++ ++ return 0; ++} ++ + static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq, + struct hns_roce_cq *cq) + { +@@ -332,7 +331,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + hns_roce_write512(qp->sq.db_reg, wqe); + } + +-static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx, ++static void update_cq_db(struct hns_roce_context *ctx, + struct hns_roce_cq *cq) + { + struct hns_roce_db cq_db = {}; +@@ -378,19 +377,17 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) + static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); + +-static int hns_roce_flush_cqe(struct hns_roce_qp **cur_qp, struct ibv_wc *wc) ++static int hns_roce_flush_cqe(struct hns_roce_qp *hr_qp, uint8_t status) + { + struct ibv_qp_attr attr; + int attr_mask; + +- if ((wc->status != IBV_WC_SUCCESS) && +- (wc->status != IBV_WC_WR_FLUSH_ERR)) { ++ if (status != HNS_ROCE_V2_CQE_WR_FLUSH_ERR) { + attr_mask = IBV_QP_STATE; + attr.qp_state = IBV_QPS_ERR; +- hns_roce_u_v2_modify_qp(&(*cur_qp)->verbs_qp.qp, &attr, +- attr_mask); ++ hns_roce_u_v2_modify_qp(&hr_qp->verbs_qp.qp, &attr, attr_mask); + +- (*cur_qp)->verbs_qp.qp.state = IBV_QPS_ERR; ++ hr_qp->verbs_qp.qp.state = IBV_QPS_ERR; + } + + return V2_CQ_OK; +@@ -409,41 +406,6 @@ static const unsigned int wc_send_op_map[] = { + [HNS_ROCE_SQ_OP_BIND_MW] = IBV_WC_BIND_MW, + }; + +-static void hns_roce_v2_get_opcode_from_sender(struct hns_roce_v2_cqe *cqe, +- struct ibv_wc *wc) +-{ +- uint32_t opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, +- CQE_BYTE_4_OPCODE_S); +- +- switch (opcode) { +- case HNS_ROCE_SQ_OP_SEND: +- case HNS_ROCE_SQ_OP_SEND_WITH_INV: +- case HNS_ROCE_SQ_OP_RDMA_WRITE: +- case HNS_ROCE_SQ_OP_BIND_MW: +- wc->wc_flags = 0; +- break; +- case HNS_ROCE_SQ_OP_SEND_WITH_IMM: +- case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: +- wc->wc_flags = IBV_WC_WITH_IMM; +- break; +- case HNS_ROCE_SQ_OP_LOCAL_INV: +- wc->wc_flags = IBV_WC_WITH_INV; +- break; +- case HNS_ROCE_SQ_OP_RDMA_READ: +- case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP: +- case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD: +- wc->wc_flags = 0; +- wc->byte_len = le32toh(cqe->byte_cnt); +- break; +- default: +- wc->status = IBV_WC_GENERAL_ERR; +- wc->wc_flags = 0; +- return; +- } +- +- wc->opcode = wc_send_op_map[opcode]; +-} +- + static const unsigned int wc_rcv_op_map[] = { + [HNS_ROCE_RECV_OP_RDMA_WRITE_IMM] = IBV_WC_RECV_RDMA_WITH_IMM, + [HNS_ROCE_RECV_OP_SEND] = IBV_WC_RECV, +@@ -451,9 +413,8 @@ static const unsigned int wc_rcv_op_map[] = { + [HNS_ROCE_RECV_OP_SEND_WITH_INV] = IBV_WC_RECV, + }; + +-static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe, +- struct ibv_wc *wc, +- uint32_t opcode) ++static void get_opcode_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ uint32_t opcode) + { + switch (opcode) { + case HNS_ROCE_RECV_OP_SEND: +@@ -476,9 +437,8 @@ static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe, + wc->opcode = wc_rcv_op_map[opcode]; + } + +-static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, +- struct hns_roce_qp **cur_qp, +- struct ibv_wc *wc, uint32_t opcode) ++static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ struct hns_roce_qp **cur_qp, uint32_t opcode) + { + if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) && + (opcode == HNS_ROCE_RECV_OP_SEND || +@@ -521,26 +481,117 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, + return V2_CQ_OK; + } + ++static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) ++{ ++ wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, ++ CQE_BYTE_32_SL_S); ++ wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, ++ CQE_BYTE_32_RMT_QPN_S); ++ wc->slid = 0; ++ wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? ++ IBV_WC_GRH : 0; ++ wc->pkey_index = 0; ++} ++ ++static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ struct hns_roce_srq *srq) ++{ ++ uint32_t wqe_idx; ++ ++ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, ++ CQE_BYTE_4_WQE_IDX_S); ++ wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; ++ hns_roce_free_srq_wqe(srq, wqe_idx); ++} ++ ++static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ struct hns_roce_qp *hr_qp, uint8_t opcode) ++{ ++ struct hns_roce_wq *wq; ++ int ret; ++ ++ wq = &hr_qp->rq; ++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++ ++ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) ++ parse_for_ud_qp(cqe, wc); ++ ++ ret = handle_recv_inl_wqe(cqe, wc, &hr_qp, opcode); ++ if (ret) { ++ verbs_err(verbs_get_ctx(hr_qp->verbs_qp.qp.context), ++ PFX "failed to handle recv inline wqe!\n"); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, ++ struct hns_roce_qp *hr_qp, uint8_t opcode) ++{ ++ struct hns_roce_wq *wq; ++ uint32_t wqe_idx; ++ ++ wq = &hr_qp->sq; ++ /* ++ * in case of signalling, the tail pointer needs to be updated ++ * according to the wqe idx in the current cqe first ++ */ ++ if (hr_qp->sq_signal_bits) { ++ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, ++ CQE_BYTE_4_WQE_IDX_S); ++ /* get the processed wqes num since last signalling */ ++ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); ++ } ++ /* write the wr_id of wq into the wc */ ++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++ ++wq->tail; ++ ++ switch (opcode) { ++ case HNS_ROCE_SQ_OP_SEND: ++ case HNS_ROCE_SQ_OP_SEND_WITH_INV: ++ case HNS_ROCE_SQ_OP_RDMA_WRITE: ++ case HNS_ROCE_SQ_OP_BIND_MW: ++ wc->wc_flags = 0; ++ break; ++ case HNS_ROCE_SQ_OP_SEND_WITH_IMM: ++ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM: ++ wc->wc_flags = IBV_WC_WITH_IMM; ++ break; ++ case HNS_ROCE_SQ_OP_LOCAL_INV: ++ wc->wc_flags = IBV_WC_WITH_INV; ++ break; ++ case HNS_ROCE_SQ_OP_RDMA_READ: ++ case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP: ++ case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD: ++ wc->wc_flags = 0; ++ wc->byte_len = le32toh(cqe->byte_cnt); ++ break; ++ default: ++ wc->status = IBV_WC_GENERAL_ERR; ++ wc->wc_flags = 0; ++ return; ++ } ++ ++ wc->opcode = wc_send_op_map[opcode]; ++} ++ + static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + struct hns_roce_qp **cur_qp, struct ibv_wc *wc) + { + struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context); + struct hns_roce_srq *srq = NULL; +- struct hns_roce_wq *wq = NULL; + struct hns_roce_v2_cqe *cqe; +- uint16_t wqe_ctr; +- uint32_t opcode; +- uint32_t srqn; ++ uint8_t opcode; ++ uint8_t status; + uint32_t qpn; +- int is_send; +- int ret; ++ bool is_send; + +- /* According to CI, find the relative cqe */ + cqe = next_cqe_sw_v2(cq); + if (!cqe) + return V2_CQ_EMPTY; + +- /* Get the next cqe, CI will be added gradually */ + ++cq->cons_index; + + udma_from_device_barrier(); +@@ -548,102 +599,48 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, + CQE_BYTE_16_LCL_QPN_S); + +- is_send = (roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == +- HNS_ROCE_V2_CQE_IS_SQ); +- +- /* if qp is zero, it will not get the correct qpn */ ++ /* if cur qp is null, then could not get the correct qpn */ + if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) { + *cur_qp = hns_roce_v2_find_qp(ctx, qpn); + if (!*cur_qp) + return V2_CQ_POLL_ERR; + } +- wc->qp_num = qpn; + +- if ((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { +- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, +- CQE_BYTE_12_XRC_SRQN_S); ++ status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, ++ CQE_BYTE_4_STATUS_S); ++ opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, ++ CQE_BYTE_4_OPCODE_S); ++ is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ; ++ if (is_send) { ++ parse_cqe_for_req(cqe, wc, *cur_qp, opcode); ++ } else { ++ wc->byte_len = le32toh(cqe->byte_cnt); ++ get_opcode_for_resp(cqe, wc, opcode); + +- srq = hns_roce_find_srq(ctx, srqn); +- if (!srq) ++ if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq)) + return V2_CQ_POLL_ERR; +- } else if ((*cur_qp)->verbs_qp.qp.srq) { +- srq = to_hr_srq((*cur_qp)->verbs_qp.qp.srq); +- } + +- if (is_send) { +- wq = &(*cur_qp)->sq; +- /* +- * if sq_signal_bits is 1, the tail pointer first update to +- * the wqe corresponding the current cqe +- */ +- if ((*cur_qp)->sq_signal_bits) { +- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4, +- CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S)); +- /* +- * wq->tail will plus a positive number every time, +- * when wq->tail exceeds 32b, it is 0 and acc +- */ +- wq->tail += (wqe_ctr - (uint16_t) wq->tail) & +- (wq->wqe_cnt - 1); ++ if (srq) { ++ parse_cqe_for_srq(cqe, wc, srq); ++ } else { ++ if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode)) ++ return V2_CQ_POLL_ERR; + } +- /* write the wr_id of wq into the wc */ +- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; +- ++wq->tail; +- } else if (srq) { +- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4, +- CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S)); +- wc->wr_id = srq->wrid[wqe_ctr & (srq->wqe_cnt - 1)]; +- hns_roce_free_srq_wqe(srq, wqe_ctr); +- } else { +- wq = &(*cur_qp)->rq; +- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; +- ++wq->tail; + } + ++ wc->qp_num = qpn; ++ + /* +- * HW maintains wc status, set the err type and directly return, after +- * generated the incorrect CQE ++ * once a cqe in error status, the driver needs to help the HW to ++ * generated flushed cqes for all subsequent wqes + */ +- if (roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, +- CQE_BYTE_4_STATUS_S) != HNS_ROCE_V2_CQE_SUCCESS) { +- hns_roce_v2_handle_error_cqe(cqe, wc); +- return hns_roce_flush_cqe(cur_qp, wc); ++ if (status != HNS_ROCE_V2_CQE_SUCCESS) { ++ handle_error_cqe(cqe, wc, status); ++ return hns_roce_flush_cqe(*cur_qp, status); + } + + wc->status = IBV_WC_SUCCESS; + +- /* +- * According to the opcode type of cqe, mark the opcode and other +- * information of wc +- */ +- if (is_send) { +- hns_roce_v2_get_opcode_from_sender(cqe, wc); +- } else { +- /* Get opcode and flag in rq&srq */ +- wc->byte_len = le32toh(cqe->byte_cnt); +- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, +- CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK; +- hns_roce_v2_get_opcode_from_receiver(cqe, wc, opcode); +- +- ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode); +- if (ret) { +- verbs_err(verbs_get_ctx(cq->ibv_cq.context), +- PFX "failed to handle recv inline wqe!\n"); +- return ret; +- } +- +- wc->sl = (uint8_t)roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, +- CQE_BYTE_32_SL_S); +- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, +- CQE_BYTE_32_RMT_QPN_S); +- wc->slid = 0; +- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? +- IBV_WC_GRH : 0; +- wc->pkey_index = 0; +- } +- + return V2_CQ_OK; + } + +@@ -668,7 +665,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) + *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M; + else +- hns_roce_v2_update_cq_cons_index(ctx, cq); ++ update_cq_db(ctx, cq); + } + + pthread_spin_unlock(&cq->lock); +@@ -1438,7 +1435,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + if (nfreed) { + cq->cons_index += nfreed; + udma_to_device_barrier(); +- hns_roce_v2_update_cq_cons_index(ctx, cq); ++ update_cq_db(ctx, cq); + } + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index af72cd7..51a1df4 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -33,7 +33,10 @@ + #ifndef _HNS_ROCE_U_HW_V2_H + #define _HNS_ROCE_U_HW_V2_H + +-#define HNS_ROCE_V2_CQE_IS_SQ 0 ++enum { ++ CQE_FOR_SQ, ++ CQE_FOR_RQ, ++}; + + #define HNS_ROCE_V2_CQ_DB_REQ_SOL 1 + #define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0 +@@ -94,11 +97,6 @@ enum { + V2_CQ_POLL_ERR = -2, + }; + +-enum { +- HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, +- HNS_ROCE_V2_CQE_OPCODE_MASK = 0x1f, +-}; +- + enum { + HNS_ROCE_V2_CQE_SUCCESS = 0x00, + HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR = 0x01, +-- +2.27.0 + diff --git a/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch b/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch new file mode 100644 index 0000000..efe74c8 --- /dev/null +++ b/0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch @@ -0,0 +1,42 @@ +From 72f495e542c1c458e71fd6971f412edec41830e1 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Wed, 24 Nov 2021 19:03:54 +0800 +Subject: libhns: hr ilog32() should be represented by a function instead of a + macro + +The compiler will check whether the modifiers of the function are of the +correct type. + +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 92dc26c..c1ae1c9 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -101,8 +101,6 @@ + #define roce_set_bit(origin, shift, val) \ + roce_set_field((origin), (1ul << (shift)), (shift), (val)) + +-#define hr_ilog32(n) ilog32((unsigned int)(n) - 1) +- + enum { + HNS_ROCE_QP_TABLE_BITS = 8, + HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, +@@ -326,6 +324,11 @@ static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) + return hr_hw_page_align(count << buf_shift); + } + ++static inline unsigned int hr_ilog32(unsigned int count) ++{ ++ return ilog32(count - 1); ++} ++ + static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev) + { + return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device); +-- +2.27.0 + diff --git a/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch b/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch new file mode 100644 index 0000000..f1faf9e --- /dev/null +++ b/0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch @@ -0,0 +1,30 @@ +From 61911051eec0f984537c2762208b8ecbc875d5d3 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Fri, 26 Nov 2021 16:53:18 +0800 +Subject: libhns: Fix the size setting error when copying CQE in clean cq() + +The size of CQE is different for different versions of hardware, so the +driver needs to specify the size of CQE explicitly. + +Fixes: 3546e6b69ac8 ("libhns: Add support for CQE in size of 64 Bytes") +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 1b4e91b..b13b6dc 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1426,7 +1426,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + (prod_index + nfreed) & cq->ibv_cq.cqe); + owner_bit = roce_get_bit(dest->byte_4, + CQE_BYTE_4_OWNER_S); +- memcpy(dest, cqe, sizeof(*cqe)); ++ memcpy(dest, cqe, cq->cqe_size); + roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S, + owner_bit); + } +-- +2.27.0 + diff --git a/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch b/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch new file mode 100644 index 0000000..d6ff8c0 --- /dev/null +++ b/0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch @@ -0,0 +1,53 @@ +From 8fbb85bae3fd2632da80e77d02bbbe73aac85f88 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Fri, 26 Nov 2021 17:55:32 +0800 +Subject: libhns: Fix the problem that XRC does not need to create RQ + +XRC QP does not require RQ, so RQ should not be created. + +Fixes: 4ed874a5cf30 ("libhns: Add support for XRC for HIP09") +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_verbs.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 923c005..557d075 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -774,12 +774,22 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + return 0; + } + ++static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) ++{ ++ if (attr->qp_type == IBV_QPT_XRC_SEND || ++ attr->qp_type == IBV_QPT_XRC_RECV || attr->srq) ++ return 0; ++ ++ return 1; ++} ++ + static int verify_qp_create_cap(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); + struct ibv_qp_cap *cap = &attr->cap; + uint32_t min_wqe_num; ++ int has_rq; + + if (!cap->max_send_wr && attr->qp_type != IBV_QPT_XRC_RECV) + return -EINVAL; +@@ -790,7 +800,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + cap->max_recv_sge > ctx->max_sge) + return -EINVAL; + +- if (attr->srq) { ++ has_rq = hns_roce_qp_has_rq(attr); ++ if (!has_rq) { + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; + } +-- +2.27.0 + diff --git a/0029-libhns-Add-vendor_err-information-for-error-WC.patch b/0029-libhns-Add-vendor_err-information-for-error-WC.patch new file mode 100644 index 0000000..559315c --- /dev/null +++ b/0029-libhns-Add-vendor_err-information-for-error-WC.patch @@ -0,0 +1,45 @@ +From 29fd05367349c7909949206a13092031b689eca7 Mon Sep 17 00:00:00 2001 +From: Lang Cheng +Date: Tue, 30 Nov 2021 20:46:14 +0800 +Subject: libhns: Add vendor_err information for error WC + +ULP can get more error information of CQ though verbs. + +Signed-off-by: Lang Cheng +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 3 +++ + providers/hns/hns_roce_u_hw_v2.h | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index b13b6dc..18399e9 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -186,6 +186,9 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + break; + } + } ++ ++ wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M, ++ CQE_BYTE_16_SUB_STATUS_S); + } + + static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 51a1df4..014cb8c 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -184,6 +184,9 @@ struct hns_roce_v2_cqe { + #define CQE_BYTE_16_LCL_QPN_S 0 + #define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S) + ++#define CQE_BYTE_16_SUB_STATUS_S 24 ++#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S) ++ + #define CQE_BYTE_28_SMAC_S 0 + #define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S) + +-- +2.27.0 + diff --git a/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch b/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch new file mode 100644 index 0000000..1d045ab --- /dev/null +++ b/0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch @@ -0,0 +1,69 @@ +From 46548879b84e8c502198a549d82ec079ebc8b9a0 Mon Sep 17 00:00:00 2001 +From: Lang Cheng +Date: Thu, 2 Dec 2021 21:44:26 +0800 +Subject: libhns: Forcibly rewrite the inline flag of WQE + +When a non-inline WR reuses a WQE that was used for inline the last time, +the remaining inline flag should be cleared. + +Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE") +Fixes: 82fc508a6625 ("libhns: Add support for UD inline") +Signed-off-by: Lang Cheng +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 18399e9..4eaa929 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -876,8 +876,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + if (!check_inl_data_len(qp, sge_info->total_len)) + return -EINVAL; + +- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, 1); +- + if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { + roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, + UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0); +@@ -993,6 +991,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + !!(wr->send_flags & IBV_SEND_SIGNALED)); + roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S, + !!(wr->send_flags & IBV_SEND_SOLICITED)); ++ roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, ++ !!(wr->send_flags & IBV_SEND_INLINE)); + + ret = check_ud_opcode(ud_sq_wqe, wr); + if (ret) +@@ -1044,8 +1044,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + + dseg += sizeof(struct hns_roce_rc_sq_wqe); + +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1); +- + if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { + roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, + 0); +@@ -1150,13 +1148,13 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + return ret; + + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, +- (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0); +- ++ !!(wr->send_flags & IBV_SEND_SIGNALED)); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, +- (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0); +- ++ !!(wr->send_flags & IBV_SEND_FENCE)); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, +- (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0); ++ !!(wr->send_flags & IBV_SEND_SOLICITED)); ++ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, ++ !!(wr->send_flags & IBV_SEND_INLINE)); + + roce_set_field(rc_sq_wqe->byte_20, + RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, +-- +2.27.0 + diff --git a/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch b/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch new file mode 100644 index 0000000..a7d0d74 --- /dev/null +++ b/0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch @@ -0,0 +1,44 @@ +From 2194680136400d6a5f12298ff4993fa6f51c2e10 Mon Sep 17 00:00:00 2001 +From: Lang Cheng +Date: Wed, 8 Dec 2021 19:03:56 +0800 +Subject: libhns: Forcibly rewrite the strong-order flag of WQE + +The Local Invalid operation sets so flag, otherwise clears so flag. + +Fixes: a9ae7e9bfb5d ("libhns: Add local invalidate MR support for hip08") +Signed-off-by: Lang Cheng +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 4eaa929..cf871ab 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -1143,10 +1143,6 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + struct hns_roce_v2_wqe_data_seg *dseg; + int ret; + +- ret = check_rc_opcode(rc_sq_wqe, wr); +- if (ret) +- return ret; +- + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, + !!(wr->send_flags & IBV_SEND_SIGNALED)); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, +@@ -1155,6 +1151,11 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + !!(wr->send_flags & IBV_SEND_SOLICITED)); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, + !!(wr->send_flags & IBV_SEND_INLINE)); ++ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0); ++ ++ ret = check_rc_opcode(rc_sq_wqe, wr); ++ if (ret) ++ return ret; + + roce_set_field(rc_sq_wqe->byte_20, + RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, +-- +2.27.0 + diff --git a/0032-util-Fix-mmio-memcpy-on-ARM.patch b/0032-util-Fix-mmio-memcpy-on-ARM.patch new file mode 100644 index 0000000..8f49e0d --- /dev/null +++ b/0032-util-Fix-mmio-memcpy-on-ARM.patch @@ -0,0 +1,32 @@ +From 2a2e3ece2ff801e8d8e4915a56fe3fff8399d6a0 Mon Sep 17 00:00:00 2001 +From: Firas Jahjah +Date: Tue, 28 Dec 2021 15:58:37 +0200 +Subject: util: Fix mmio memcpy on ARM + +The below commit added a new implementation of mmio_memcpy_x64() for +ARM which was broken. The destination buffer must be advanced so we +don't copy to the same 64 bytes. + +Fixes: 159933c37 ("libhns: Add support for direct wqe") +Reviewed-by: Daniel Kranzdorf +Reviewed-by: Yossi Leybovich +Signed-off-by: Firas Jahjah +--- + util/mmio.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/util/mmio.h b/util/mmio.h +index 01d1455..5974058 100644 +--- a/util/mmio.h ++++ b/util/mmio.h +@@ -225,6 +225,7 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) + _mmio_memcpy_x64_64b(dest, src); + bytecnt -= sizeof(uint64x2x4_t); + src += sizeof(uint64x2x4_t); ++ dest += sizeof(uint64x2x4_t); + } while (bytecnt > 0); + } + +-- +2.27.0 + diff --git a/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch b/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch new file mode 100644 index 0000000..d501e6c --- /dev/null +++ b/0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch @@ -0,0 +1,550 @@ +From 532c4b6babe97e3023a049f1c6bd8a8e3ad95140 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Sat, 25 Dec 2021 17:42:55 +0800 +Subject: libhns: Use new interfaces hr reg ***() to operate the WQE field + +Use hr_reg_xxx() to simply the codes for filling fields. + +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 170 ++++++++++------------------ + providers/hns/hns_roce_u_hw_v2.h | 184 ++++++++++++++----------------- + 2 files changed, 144 insertions(+), 210 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index cf871ab..0cff12b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -323,13 +323,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) + struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1); +- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M, +- RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl); +- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M, +- RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT); +- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M, +- RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); ++ hr_reg_enable(rc_sq_wqe, RCWQE_FLAG); ++ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_L, qp->sl); ++ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT); ++ hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head); + + hns_roce_write512(qp->sq.db_reg, wqe); + } +@@ -834,29 +831,15 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr, + tmp += wr->sg_list[i].length; + } + +- roce_set_field(ud_sq_wqe->msg_len, +- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M, +- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S, +- *loc & 0xffff); +- +- roce_set_field(ud_sq_wqe->sge_num_pd, +- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M, +- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S, +- (*loc >> 16) & 0xff); ++ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff); ++ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff); + + tmp_data = *loc >> 24; + loc++; + tmp_data |= ((*loc & 0xffff) << 8); + +- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx, +- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M, +- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S, +- tmp_data); +- +- roce_set_field(ud_sq_wqe->udpspn_rsv, +- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M, +- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S, +- *loc >> 16); ++ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_47_24, tmp_data); ++ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16); + } + + static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) +@@ -877,13 +860,11 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + return -EINVAL; + + if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) { +- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, +- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0); ++ hr_reg_clear(ud_sq_wqe, UDWQE_INLINE_TYPE); + + fill_ud_inn_inl_data(wr, ud_sq_wqe); + } else { +- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx, +- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 1); ++ hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE); + + ret = fill_ext_sge_inl_data(qp, wr, sge_info); + if (ret) +@@ -891,8 +872,7 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + + sge_info->valid_num = sge_info->start_idx - sge_idx; + +- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M, +- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num); ++ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); + } + + return 0; +@@ -919,8 +899,7 @@ static int check_ud_opcode(struct hns_roce_ud_sq_wqe *ud_sq_wqe, + + ud_sq_wqe->immtdata = get_immtdata(ib_op, wr); + +- roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M, +- UD_SQ_WQE_OPCODE_S, to_hr_opcode(ib_op)); ++ hr_reg_write(ud_sq_wqe, UDWQE_OPCODE, to_hr_opcode(ib_op)); + + return 0; + } +@@ -931,24 +910,12 @@ static int fill_ud_av(struct hns_roce_ud_sq_wqe *ud_sq_wqe, + if (unlikely(ah->av.sl > MAX_SERVICE_LEVEL)) + return EINVAL; + +- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M, +- UD_SQ_WQE_SL_S, ah->av.sl); +- +- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M, +- UD_SQ_WQE_PD_S, to_hr_pd(ah->ibv_ah.pd)->pdn); +- +- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M, +- UD_SQ_WQE_TCLASS_S, ah->av.tclass); +- +- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M, +- UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit); +- +- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M, +- UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel); +- +- roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M, +- UD_SQ_WQE_UDP_SPN_S, ah->av.udp_sport); +- ++ hr_reg_write(ud_sq_wqe, UDWQE_SL, ah->av.sl); ++ hr_reg_write(ud_sq_wqe, UDWQE_PD, to_hr_pd(ah->ibv_ah.pd)->pdn); ++ hr_reg_write(ud_sq_wqe, UDWQE_TCLASS, ah->av.tclass); ++ hr_reg_write(ud_sq_wqe, UDWQE_HOPLIMIT, ah->av.hop_limit); ++ hr_reg_write(ud_sq_wqe, UDWQE_FLOW_LABEL, ah->av.flowlabel); ++ hr_reg_write(ud_sq_wqe, UDWQE_UDPSPN, ah->av.udp_sport); + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + ud_sq_wqe->sgid_index = ah->av.gid_index; + memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE); +@@ -962,17 +929,14 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe, + { + int ret = 0; + +- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx, +- UD_SQ_WQE_MSG_START_SGE_IDX_M, +- UD_SQ_WQE_MSG_START_SGE_IDX_S, +- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); ++ hr_reg_write(ud_sq_wqe, UDWQE_MSG_START_SGE_IDX, ++ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); + + set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info); + + ud_sq_wqe->msg_len = htole32(sge_info->total_len); + +- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M, +- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num); ++ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); + + if (wr->send_flags & IBV_SEND_INLINE) + ret = set_ud_inl(qp, wr, ud_sq_wqe, sge_info); +@@ -987,12 +951,12 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe; + int ret = 0; + +- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S, +- !!(wr->send_flags & IBV_SEND_SIGNALED)); +- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S, +- !!(wr->send_flags & IBV_SEND_SOLICITED)); +- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, +- !!(wr->send_flags & IBV_SEND_INLINE)); ++ hr_reg_write_bool(ud_sq_wqe, UDWQE_CQE, ++ !!(wr->send_flags & IBV_SEND_SIGNALED)); ++ hr_reg_write_bool(ud_sq_wqe, UDWQE_SE, ++ !!(wr->send_flags & IBV_SEND_SOLICITED)); ++ hr_reg_write_bool(ud_sq_wqe, UDWQE_INLINE, ++ !!(wr->send_flags & IBV_SEND_INLINE)); + + ret = check_ud_opcode(ud_sq_wqe, wr); + if (ret) +@@ -1001,8 +965,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey & 0x80000000 ? + qp->qkey : wr->wr.ud.remote_qkey); + +- roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M, +- UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn); ++ hr_reg_write(ud_sq_wqe, UDWQE_DQPN, wr->wr.ud.remote_qpn); + + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) +@@ -1021,8 +984,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) + udma_to_device_barrier(); + +- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S, +- ~((qp->sq.head + nreq) >> qp->sq.shift)); ++ hr_reg_write_bool(wqe, RCWQE_OWNER, ++ !((qp->sq.head + nreq) & BIT(qp->sq.shift))); + + return ret; + } +@@ -1045,8 +1008,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + dseg += sizeof(struct hns_roce_rc_sq_wqe); + + if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) { +- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, +- 0); ++ hr_reg_clear(rc_sq_wqe, RCWQE_INLINE_TYPE); + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr), +@@ -1054,8 +1016,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + dseg += wr->sg_list[i].length; + } + } else { +- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S, +- 1); ++ hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE); + + ret = fill_ext_sge_inl_data(qp, wr, sge_info); + if (ret) +@@ -1063,9 +1024,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + + sge_info->valid_num = sge_info->start_idx - sge_idx; + +- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, +- RC_SQ_WQE_BYTE_16_SGE_NUM_S, +- sge_info->valid_num); ++ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); + } + + return 0; +@@ -1074,17 +1033,16 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, + const struct ibv_send_wr *wr) + { +- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S, +- wr->bind_mw.mw->type - 1); +- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S, +- (wr->bind_mw.bind_info.mw_access_flags & +- IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0); +- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S, +- (wr->bind_mw.bind_info.mw_access_flags & +- IBV_ACCESS_REMOTE_READ) ? 1 : 0); +- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S, +- (wr->bind_mw.bind_info.mw_access_flags & +- IBV_ACCESS_REMOTE_WRITE) ? 1 : 0); ++ unsigned int access = wr->bind_mw.bind_info.mw_access_flags; ++ ++ hr_reg_write_bool(wqe, RCWQE_MW_TYPE, wr->bind_mw.mw->type - 1); ++ hr_reg_write_bool(wqe, RCWQE_MW_RA_EN, ++ !!(access & IBV_ACCESS_REMOTE_ATOMIC)); ++ hr_reg_write_bool(wqe, RCWQE_MW_RR_EN, ++ !!(access & IBV_ACCESS_REMOTE_READ)); ++ hr_reg_write_bool(wqe, RCWQE_MW_RW_EN, ++ !!(access & IBV_ACCESS_REMOTE_WRITE)); ++ + wqe->new_rkey = htole32(wr->bind_mw.rkey); + wqe->byte_16 = htole32(wr->bind_mw.bind_info.length & + HNS_ROCE_ADDRESS_MASK); +@@ -1117,7 +1075,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe, + wqe->va = htole64(wr->wr.atomic.remote_addr); + break; + case IBV_WR_LOCAL_INV: +- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1); ++ hr_reg_enable(wqe, RCWQE_SO); + /* fallthrough */ + case IBV_WR_SEND_WITH_INV: + wqe->inv_key = htole32(wr->invalidate_rkey); +@@ -1130,8 +1088,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe, + break; + } + +- roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M, +- RC_SQ_WQE_BYTE_4_OPCODE_S, to_hr_opcode(wr->opcode)); ++ hr_reg_write(wqe, RCWQE_OPCODE, to_hr_opcode(wr->opcode)); + + return ret; + } +@@ -1143,24 +1100,22 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + struct hns_roce_v2_wqe_data_seg *dseg; + int ret; + +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, +- !!(wr->send_flags & IBV_SEND_SIGNALED)); +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, +- !!(wr->send_flags & IBV_SEND_FENCE)); +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, +- !!(wr->send_flags & IBV_SEND_SOLICITED)); +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, +- !!(wr->send_flags & IBV_SEND_INLINE)); +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0); ++ hr_reg_write_bool(wqe, RCWQE_CQE, ++ !!(wr->send_flags & IBV_SEND_SIGNALED)); ++ hr_reg_write_bool(wqe, RCWQE_FENCE, ++ !!(wr->send_flags & IBV_SEND_FENCE)); ++ hr_reg_write_bool(wqe, RCWQE_SE, ++ !!(wr->send_flags & IBV_SEND_SOLICITED)); ++ hr_reg_write_bool(wqe, RCWQE_INLINE, ++ !!(wr->send_flags & IBV_SEND_INLINE)); ++ hr_reg_clear(wqe, RCWQE_SO); + + ret = check_rc_opcode(rc_sq_wqe, wr); + if (ret) + return ret; + +- roce_set_field(rc_sq_wqe->byte_20, +- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, +- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, +- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); ++ hr_reg_write(rc_sq_wqe, RCWQE_MSG_START_SGE_IDX, ++ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1)); + + if (wr->opcode == IBV_WR_BIND_MW) + goto wqe_valid; +@@ -1172,8 +1127,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + + rc_sq_wqe->msg_len = htole32(sge_info->total_len); + +- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, +- RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num); ++ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); + + if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD || + wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { +@@ -1196,8 +1150,8 @@ wqe_valid: + if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB) + udma_to_device_barrier(); + +- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S, +- ~((qp->sq.head + nreq) >> qp->sq.shift)); ++ hr_reg_write_bool(wqe, RCWQE_OWNER, ++ !((qp->sq.head + nreq) & BIT(qp->sq.shift))); + + return 0; + } +@@ -1243,10 +1197,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + + switch (ibvqp->qp_type) { + case IBV_QPT_XRC_SEND: +- roce_set_field(wqe->byte_16, +- RC_SQ_WQE_BYTE_16_XRC_SRQN_M, +- RC_SQ_WQE_BYTE_16_XRC_SRQN_S, +- wr->qp_type.xrc.remote_srqn); ++ hr_reg_write(wqe, RCWQE_XRC_SRQN, ++ wr->qp_type.xrc.remote_srqn); + SWITCH_FALLTHROUGH; + case IBV_QPT_RC: + ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info); +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 014cb8c..4330b7d 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -220,53 +220,44 @@ struct hns_roce_rc_sq_wqe { + __le64 va; + }; + +-#define RC_SQ_WQE_BYTE_4_OPCODE_S 0 +-#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +- +-#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5 +-#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) +- +-#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13 +-#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) +- +-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15 +-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) +- +-#define RC_SQ_WQE_BYTE_4_OWNER_S 7 +- +-#define RC_SQ_WQE_BYTE_4_CQE_S 8 +- +-#define RC_SQ_WQE_BYTE_4_FENCE_S 9 +- +-#define RC_SQ_WQE_BYTE_4_SO_S 10 +- +-#define RC_SQ_WQE_BYTE_4_SE_S 11 +- +-#define RC_SQ_WQE_BYTE_4_INLINE_S 12 +- +-#define RC_SQ_WQE_BYTE_4_MW_TYPE_S 14 +- +-#define RC_SQ_WQE_BYTE_4_ATOMIC_S 20 +- +-#define RC_SQ_WQE_BYTE_4_RDMA_READ_S 21 +- +-#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22 +- +-#define RC_SQ_WQE_BYTE_4_FLAG_S 31 +- +-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0 +-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \ +- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S) +- +-#define RC_SQ_WQE_BYTE_16_SGE_NUM_S 24 +-#define RC_SQ_WQE_BYTE_16_SGE_NUM_M \ +- (((1UL << 8) - 1) << RC_SQ_WQE_BYTE_16_SGE_NUM_S) +- +-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 +-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M \ +- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S) +- +-#define RC_SQ_WQE_BYTE_20_INL_TYPE_S 31 ++#define RCWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_rc_sq_wqe, h, l) ++ ++#define RCWQE_OPCODE RCWQE_FIELD_LOC(4, 0) ++#define RCWQE_DB_SL_L RCWQE_FIELD_LOC(6, 5) ++#define RCWQE_SQPN_L RCWQE_FIELD_LOC(6, 5) ++#define RCWQE_OWNER RCWQE_FIELD_LOC(7, 7) ++#define RCWQE_CQE RCWQE_FIELD_LOC(8, 8) ++#define RCWQE_FENCE RCWQE_FIELD_LOC(9, 9) ++#define RCWQE_SO RCWQE_FIELD_LOC(10, 10) ++#define RCWQE_SE RCWQE_FIELD_LOC(11, 11) ++#define RCWQE_INLINE RCWQE_FIELD_LOC(12, 12) ++#define RCWQE_DB_SL_H RCWQE_FIELD_LOC(14, 13) ++#define RCWQE_WQE_IDX RCWQE_FIELD_LOC(30, 15) ++#define RCWQE_SQPN_H RCWQE_FIELD_LOC(30, 13) ++#define RCWQE_FLAG RCWQE_FIELD_LOC(31, 31) ++#define RCWQE_MSG_LEN RCWQE_FIELD_LOC(63, 32) ++#define RCWQE_INV_KEY_IMMTDATA RCWQE_FIELD_LOC(95, 64) ++#define RCWQE_XRC_SRQN RCWQE_FIELD_LOC(119, 96) ++#define RCWQE_SGE_NUM RCWQE_FIELD_LOC(127, 120) ++#define RCWQE_MSG_START_SGE_IDX RCWQE_FIELD_LOC(151, 128) ++#define RCWQE_REDUCE_CODE RCWQE_FIELD_LOC(158, 152) ++#define RCWQE_INLINE_TYPE RCWQE_FIELD_LOC(159, 159) ++#define RCWQE_RKEY RCWQE_FIELD_LOC(191, 160) ++#define RCWQE_VA_L RCWQE_FIELD_LOC(223, 192) ++#define RCWQE_VA_H RCWQE_FIELD_LOC(255, 224) ++#define RCWQE_LEN0 RCWQE_FIELD_LOC(287, 256) ++#define RCWQE_LKEY0 RCWQE_FIELD_LOC(319, 288) ++#define RCWQE_VA0_L RCWQE_FIELD_LOC(351, 320) ++#define RCWQE_VA0_H RCWQE_FIELD_LOC(383, 352) ++#define RCWQE_LEN1 RCWQE_FIELD_LOC(415, 384) ++#define RCWQE_LKEY1 RCWQE_FIELD_LOC(447, 416) ++#define RCWQE_VA1_L RCWQE_FIELD_LOC(479, 448) ++#define RCWQE_VA1_H RCWQE_FIELD_LOC(511, 480) ++ ++#define RCWQE_MW_TYPE RCWQE_FIELD_LOC(256, 256) ++#define RCWQE_MW_RA_EN RCWQE_FIELD_LOC(258, 258) ++#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) ++#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260) + + struct hns_roce_v2_wqe_data_seg { + __le32 len; +@@ -323,60 +314,51 @@ struct hns_roce_ud_sq_wqe { + uint8_t dgid[HNS_ROCE_GID_SIZE]; + }; + +-#define UD_SQ_WQE_OPCODE_S 0 +-#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0) +- +-#define UD_SQ_WQE_OWNER_S 7 +- +-#define UD_SQ_WQE_CQE_S 8 +- +-#define UD_SQ_WQE_SE_S 11 +- +-#define UD_SQ_WQE_PD_S 0 +-#define UD_SQ_WQE_PD_M GENMASK(23, 0) +- +-#define UD_SQ_WQE_SGE_NUM_S 24 +-#define UD_SQ_WQE_SGE_NUM_M GENMASK(31, 24) +- +-#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0 +-#define UD_SQ_WQE_MSG_START_SGE_IDX_M GENMASK(23, 0) +- +-#define UD_SQ_WQE_UDP_SPN_S 16 +-#define UD_SQ_WQE_UDP_SPN_M GENMASK(31, 16) +- +-#define UD_SQ_WQE_DQPN_S 0 +-#define UD_SQ_WQE_DQPN_M GENMASK(23, 0) +- +-#define UD_SQ_WQE_VLAN_S 0 +-#define UD_SQ_WQE_VLAN_M GENMASK(15, 0) +- +-#define UD_SQ_WQE_HOPLIMIT_S 16 +-#define UD_SQ_WQE_HOPLIMIT_M GENMASK(23, 16) +- +-#define UD_SQ_WQE_TCLASS_S 24 +-#define UD_SQ_WQE_TCLASS_M GENMASK(31, 24) +- +-#define UD_SQ_WQE_FLOW_LABEL_S 0 +-#define UD_SQ_WQE_FLOW_LABEL_M GENMASK(19, 0) +- +-#define UD_SQ_WQE_SL_S 20 +-#define UD_SQ_WQE_SL_M GENMASK(23, 20) +- +-#define UD_SQ_WQE_VLAN_EN_S 30 +- +-#define UD_SQ_WQE_LBI_S 31 +- +-#define UD_SQ_WQE_BYTE_4_INL_S 12 +-#define UD_SQ_WQE_BYTE_20_INL_TYPE_S 31 +- +-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S 16 +-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M GENMASK(31, 16) +-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S 24 +-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M GENMASK(31, 24) +-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S 0 +-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M GENMASK(23, 0) +-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S 0 +-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M GENMASK(15, 0) ++#define UDWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ud_sq_wqe, h, l) ++ ++#define UDWQE_OPCODE UDWQE_FIELD_LOC(4, 0) ++#define UDWQE_DB_SL_L UDWQE_FIELD_LOC(6, 5) ++#define UDWQE_OWNER UDWQE_FIELD_LOC(7, 7) ++#define UDWQE_CQE UDWQE_FIELD_LOC(8, 8) ++#define UDWQE_RSVD1 UDWQE_FIELD_LOC(10, 9) ++#define UDWQE_SE UDWQE_FIELD_LOC(11, 11) ++#define UDWQE_INLINE UDWQE_FIELD_LOC(12, 12) ++#define UDWQE_DB_SL_H UDWQE_FIELD_LOC(14, 13) ++#define UDWQE_WQE_IDX UDWQE_FIELD_LOC(30, 15) ++#define UDWQE_FLAG UDWQE_FIELD_LOC(31, 31) ++#define UDWQE_MSG_LEN UDWQE_FIELD_LOC(63, 32) ++#define UDWQE_IMMTDATA UDWQE_FIELD_LOC(95, 64) ++#define UDWQE_PD UDWQE_FIELD_LOC(119, 96) ++#define UDWQE_SGE_NUM UDWQE_FIELD_LOC(127, 120) ++#define UDWQE_MSG_START_SGE_IDX UDWQE_FIELD_LOC(151, 128) ++#define UDWQE_RSVD3 UDWQE_FIELD_LOC(158, 152) ++#define UDWQE_INLINE_TYPE UDWQE_FIELD_LOC(159, 159) ++#define UDWQE_RSVD4 UDWQE_FIELD_LOC(175, 160) ++#define UDWQE_UDPSPN UDWQE_FIELD_LOC(191, 176) ++#define UDWQE_QKEY UDWQE_FIELD_LOC(223, 192) ++#define UDWQE_DQPN UDWQE_FIELD_LOC(247, 224) ++#define UDWQE_RSVD5 UDWQE_FIELD_LOC(255, 248) ++#define UDWQE_VLAN UDWQE_FIELD_LOC(271, 256) ++#define UDWQE_HOPLIMIT UDWQE_FIELD_LOC(279, 272) ++#define UDWQE_TCLASS UDWQE_FIELD_LOC(287, 280) ++#define UDWQE_FLOW_LABEL UDWQE_FIELD_LOC(307, 288) ++#define UDWQE_SL UDWQE_FIELD_LOC(311, 308) ++#define UDWQE_PORTN UDWQE_FIELD_LOC(314, 312) ++#define UDWQE_RSVD6 UDWQE_FIELD_LOC(317, 315) ++#define UDWQE_UD_VLAN_EN UDWQE_FIELD_LOC(318, 318) ++#define UDWQE_LBI UDWQE_FIELD_LOC(319, 319) ++#define UDWQE_DMAC_L UDWQE_FIELD_LOC(351, 320) ++#define UDWQE_DMAC_H UDWQE_FIELD_LOC(367, 352) ++#define UDWQE_GMV_IDX UDWQE_FIELD_LOC(383, 368) ++#define UDWQE_DGID0 UDWQE_FIELD_LOC(415, 384) ++#define UDWQE_DGID1 UDWQE_FIELD_LOC(447, 416) ++#define UDWQE_DGID2 UDWQE_FIELD_LOC(479, 448) ++#define UDWQE_DGID3 UDWQE_FIELD_LOC(511, 480) ++ ++#define UDWQE_INLINE_DATA_15_0 UDWQE_FIELD_LOC(63, 48) ++#define UDWQE_INLINE_DATA_23_16 UDWQE_FIELD_LOC(127, 120) ++#define UDWQE_INLINE_DATA_47_24 UDWQE_FIELD_LOC(151, 128) ++#define UDWQE_INLINE_DATA_63_48 UDWQE_FIELD_LOC(175, 160) + + #define MAX_SERVICE_LEVEL 0x7 + +-- +2.27.0 + diff --git a/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch b/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch new file mode 100644 index 0000000..8fecc95 --- /dev/null +++ b/0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch @@ -0,0 +1,186 @@ +From 49263de90f77f218710ef45bc0377d3e2019d811 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Sat, 25 Dec 2021 17:42:54 +0800 +Subject: libhns: Use new interfaces hr reg ***() to operate the DB field + +Use hr_reg_xxx() to simply the codes for filling fields. + +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 59 ++++++++++++++------------------ + providers/hns/hns_roce_u_hw_v2.h | 30 ++++++---------- + 2 files changed, 35 insertions(+), 54 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 0cff12b..e7dec0b 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -291,10 +291,9 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx, + { + struct hns_roce_db rq_db = {}; + +- rq_db.byte_4 = htole32(qpn); +- roce_set_field(rq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, +- HNS_ROCE_V2_RQ_DB); +- rq_db.parameter = htole32(rq_head); ++ hr_reg_write(&rq_db, DB_TAG, qpn); ++ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB); ++ hr_reg_write(&rq_db, DB_PI, rq_head); + + hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db); + } +@@ -304,12 +303,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, + { + struct hns_roce_db sq_db = {}; + +- sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num); +- roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, +- HNS_ROCE_V2_SQ_DB); ++ hr_reg_write(&sq_db, DB_TAG, qp->verbs_qp.qp.qp_num); ++ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB); ++ hr_reg_write(&sq_db, DB_PI, qp->sq.head); ++ hr_reg_write(&sq_db, DB_SL, qp->sl); + +- sq_db.parameter = htole32(qp->sq.head); +- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl); + hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); + } + +@@ -336,14 +334,10 @@ static void update_cq_db(struct hns_roce_context *ctx, + { + struct hns_roce_db cq_db = {}; + +- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn); +- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, +- HNS_ROCE_V2_CQ_DB_PTR); +- +- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M, +- DB_PARAM_CQ_CONSUMER_IDX_S, cq->cons_index); +- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, +- DB_PARAM_CQ_CMD_SN_S, 1); ++ hr_reg_write(&cq_db, DB_TAG, cq->cqn); ++ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_PTR); ++ hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index); ++ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1); + + hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); + } +@@ -663,7 +657,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, + + if (npolled || err == V2_CQ_POLL_ERR) { + if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) +- *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M; ++ *cq->db = cq->cons_index & RECORD_DB_CI_MASK; + else + update_cq_db(ctx, cq); + } +@@ -679,24 +673,17 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) + struct hns_roce_cq *cq = to_hr_cq(ibvcq); + struct hns_roce_db cq_db = {}; + uint32_t solicited_flag; +- uint32_t cmd_sn; + uint32_t ci; + + ci = cq->cons_index & ((cq->cq_depth << 1) - 1); +- cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK; + solicited_flag = solicited ? HNS_ROCE_V2_CQ_DB_REQ_SOL : + HNS_ROCE_V2_CQ_DB_REQ_NEXT; + +- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn); +- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, +- HNS_ROCE_V2_CQ_DB_NTR); +- +- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M, +- DB_PARAM_CQ_CONSUMER_IDX_S, ci); +- +- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M, +- DB_PARAM_CQ_CMD_SN_S, cmd_sn); +- roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag); ++ hr_reg_write(&cq_db, DB_TAG, cq->cqn); ++ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NTR); ++ hr_reg_write(&cq_db, DB_CQ_CI, ci); ++ hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn); ++ hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag); + + hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db); + +@@ -1626,6 +1613,13 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) + idx_que->head++; + } + ++static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq) ++{ ++ hr_reg_write(db, DB_TAG, srq->srqn); ++ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB); ++ hr_reg_write(db, DB_PI, srq->idx_que.head); ++} ++ + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +@@ -1665,10 +1659,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + */ + udma_to_device_barrier(); + +- srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S | +- srq->srqn); +- srq_db.parameter = htole32(srq->idx_que.head & +- DB_PARAM_SRQ_PRODUCER_COUNTER_M); ++ update_srq_db(&srq_db, srq); + + hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, + (__le32 *)&srq_db); +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index 4330b7d..e91b1f7 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -41,8 +41,6 @@ enum { + #define HNS_ROCE_V2_CQ_DB_REQ_SOL 1 + #define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0 + +-#define HNS_ROCE_CMDSN_MASK 0x3 +- + #define HNS_ROCE_SL_SHIFT 2 + + /* V2 REG DEFINITION */ +@@ -127,27 +125,19 @@ struct hns_roce_db { + __le32 byte_4; + __le32 parameter; + }; +-#define DB_BYTE_4_TAG_S 0 +-#define DB_BYTE_4_TAG_M GENMASK(23, 0) +- +-#define DB_BYTE_4_CMD_S 24 +-#define DB_BYTE_4_CMD_M GENMASK(27, 24) +- +-#define DB_BYTE_4_FLAG_S 31 +- +-#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0 +-#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0) +- +-#define DB_PARAM_SL_S 16 +-#define DB_PARAM_SL_M GENMASK(18, 16) + +-#define DB_PARAM_CQ_CONSUMER_IDX_S 0 +-#define DB_PARAM_CQ_CONSUMER_IDX_M GENMASK(23, 0) ++#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_db, h, l) + +-#define DB_PARAM_CQ_NOTIFY_S 24 ++#define DB_TAG DB_FIELD_LOC(23, 0) ++#define DB_CMD DB_FIELD_LOC(27, 24) ++#define DB_FLAG DB_FIELD_LOC(31, 31) ++#define DB_PI DB_FIELD_LOC(47, 32) ++#define DB_SL DB_FIELD_LOC(50, 48) ++#define DB_CQ_CI DB_FIELD_LOC(55, 32) ++#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56) ++#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57) + +-#define DB_PARAM_CQ_CMD_SN_S 25 +-#define DB_PARAM_CQ_CMD_SN_M GENMASK(26, 25) ++#define RECORD_DB_CI_MASK GENMASK(23, 0) + + struct hns_roce_v2_cqe { + __le32 byte_4; +-- +2.27.0 + diff --git a/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch b/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch new file mode 100644 index 0000000..27adaf6 --- /dev/null +++ b/0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch @@ -0,0 +1,306 @@ +From 2da2a94f0ef5b6cf7fb8eacee1814a418d9bde74 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Sat, 25 Dec 2021 17:42:53 +0800 +Subject: libhns: Add new interfaces hr reg ***() to operate the CQE field + +Implement hr_reg_xxx() to simplify the code for filling or extracting +fields. + +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.h | 53 +++++++++++++++++++++++++ + providers/hns/hns_roce_u_hw_v2.c | 58 ++++++++++------------------ + providers/hns/hns_roce_u_hw_v2.h | 66 ++++++++++++-------------------- + 3 files changed, 98 insertions(+), 79 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index c1ae1c9..df7f485 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -101,6 +101,59 @@ + #define roce_set_bit(origin, shift, val) \ + roce_set_field((origin), (1ul << (shift)), (shift), (val)) + ++#define FIELD_LOC(field_type, field_h, field_l) \ ++ field_type, field_h, \ ++ field_l + BUILD_ASSERT_OR_ZERO(((field_h) / 32) == \ ++ ((field_l) / 32)) ++ ++#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ ++ ({ \ ++ const field_type *_ptr = ptr; \ ++ BUILD_ASSERT((field_h) == (field_l)); \ ++ *((__le32 *)_ptr + (field_h) / 32) |= \ ++ htole32(BIT((field_l) % 32)); \ ++ }) ++ ++#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) ++ ++#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ ++ ({ \ ++ const field_type *_ptr = ptr; \ ++ BUILD_ASSERT((field_h) >= (field_l)); \ ++ *((__le32 *)_ptr + (field_h) / 32) &= \ ++ ~htole32(GENMASK((field_h) % 32, (field_l) % 32)); \ ++ }) ++ ++#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) ++ ++#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \ ++ ({ \ ++ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \ ++ _hr_reg_clear(ptr, field_type, field_h, field_l);\ ++ }) ++ ++#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val) ++ ++#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ ++ ({ \ ++ const uint32_t _val = val; \ ++ _hr_reg_clear(ptr, field_type, field_h, field_l); \ ++ *((__le32 *)ptr + (field_h) / 32) |= htole32(FIELD_PREP( \ ++ GENMASK((field_h) % 32, (field_l) % 32), _val)); \ ++ }) ++ ++#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) ++ ++#define _hr_reg_read(ptr, field_type, field_h, field_l) \ ++ ({ \ ++ const field_type *_ptr = ptr; \ ++ BUILD_ASSERT((field_h) >= (field_l)); \ ++ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \ ++ le32toh(*((__le32 *)_ptr + (field_h) / 32))); \ ++ }) ++ ++#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) ++ + enum { + HNS_ROCE_QP_TABLE_BITS = 8, + HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index e7dec0b..558457a 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -187,8 +187,7 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + } + } + +- wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M, +- CQE_BYTE_16_SUB_STATUS_S); ++ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS); + } + + static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry) +@@ -200,8 +199,8 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n) + { + struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe); + +- return (!!(roce_get_bit(cqe->byte_4, CQE_BYTE_4_OWNER_S)) ^ +- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL; ++ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ? ++ cqe : NULL; + } + + static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) +@@ -257,8 +256,7 @@ static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe, + uint32_t srqn; + + if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) { +- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M, +- CQE_BYTE_12_XRC_SRQN_S); ++ srqn = hr_reg_read(cqe, CQE_XRC_SRQN); + + *srq = hns_roce_find_srq(ctx, srqn); + if (!*srq) +@@ -438,15 +436,13 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + (opcode == HNS_ROCE_RECV_OP_SEND || + opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM || + opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) && +- (roce_get_bit(cqe->byte_4, CQE_BYTE_4_RQ_INLINE_S))) { ++ hr_reg_read(cqe, CQE_RQ_INLINE)) { + struct hns_roce_rinl_sge *sge_list; + uint32_t wr_num, wr_cnt, sge_num, data_len; + uint8_t *wqe_buf; + uint32_t sge_cnt, size; + +- wr_num = (uint16_t)roce_get_field(cqe->byte_4, +- CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S) & 0xffff; ++ wr_num = hr_reg_read(cqe, CQE_WQE_IDX); + wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1); + + sge_list = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sg_list; +@@ -477,13 +473,10 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + + static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) + { +- wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M, +- CQE_BYTE_32_SL_S); +- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M, +- CQE_BYTE_32_RMT_QPN_S); ++ wc->sl = hr_reg_read(cqe, CQE_SL); ++ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN); + wc->slid = 0; +- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ? +- IBV_WC_GRH : 0; ++ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0; + wc->pkey_index = 0; + } + +@@ -492,8 +485,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + { + uint32_t wqe_idx; + +- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S); ++ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); + wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)]; + hns_roce_free_srq_wqe(srq, wqe_idx); + } +@@ -533,8 +525,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, + * according to the wqe idx in the current cqe first + */ + if (hr_qp->sq_signal_bits) { +- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S); ++ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX); + /* get the processed wqes num since last signalling */ + wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1); + } +@@ -590,8 +581,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + + udma_from_device_barrier(); + +- qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, +- CQE_BYTE_16_LCL_QPN_S); ++ qpn = hr_reg_read(cqe, CQE_LCL_QPN); + + /* if cur qp is null, then could not get the correct qpn */ + if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) { +@@ -600,11 +590,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq, + return V2_CQ_POLL_ERR; + } + +- status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M, +- CQE_BYTE_4_STATUS_S); +- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M, +- CQE_BYTE_4_OPCODE_S); +- is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ; ++ status = hr_reg_read(cqe, CQE_STATUS); ++ opcode = hr_reg_read(cqe, CQE_OPCODE); ++ is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ; + if (is_send) { + parse_cqe_for_req(cqe, wc, *cur_qp, opcode); + } else { +@@ -1350,26 +1338,20 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, + + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe); +- if (roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M, +- CQE_BYTE_16_LCL_QPN_S) == qpn) { +- is_recv_cqe = roce_get_bit(cqe->byte_4, +- CQE_BYTE_4_S_R_S); ++ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) { ++ is_recv_cqe = hr_reg_read(cqe, CQE_S_R); + + if (srq && is_recv_cqe) { +- wqe_index = roce_get_field(cqe->byte_4, +- CQE_BYTE_4_WQE_IDX_M, +- CQE_BYTE_4_WQE_IDX_S); ++ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX); + hns_roce_free_srq_wqe(srq, wqe_index); + } + ++nfreed; + } else if (nfreed) { + dest = get_cqe_v2(cq, + (prod_index + nfreed) & cq->ibv_cq.cqe); +- owner_bit = roce_get_bit(dest->byte_4, +- CQE_BYTE_4_OWNER_S); ++ owner_bit = hr_reg_read(dest, CQE_OWNER); + memcpy(dest, cqe, cq->cqe_size); +- roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S, +- owner_bit); ++ hr_reg_write_bool(dest, CQE_OWNER, owner_bit); + } + } + +diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h +index e91b1f7..92e5f1a 100644 +--- a/providers/hns/hns_roce_u_hw_v2.h ++++ b/providers/hns/hns_roce_u_hw_v2.h +@@ -154,47 +154,31 @@ struct hns_roce_v2_cqe { + __le32 rsv[8]; + }; + +-#define CQE_BYTE_4_OPCODE_S 0 +-#define CQE_BYTE_4_OPCODE_M (((1UL << 5) - 1) << CQE_BYTE_4_OPCODE_S) +- +-#define CQE_BYTE_4_RQ_INLINE_S 5 +- +-#define CQE_BYTE_4_S_R_S 6 +-#define CQE_BYTE_4_OWNER_S 7 +- +-#define CQE_BYTE_4_STATUS_S 8 +-#define CQE_BYTE_4_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_4_STATUS_S) +- +-#define CQE_BYTE_4_WQE_IDX_S 16 +-#define CQE_BYTE_4_WQE_IDX_M (((1UL << 16) - 1) << CQE_BYTE_4_WQE_IDX_S) +- +-#define CQE_BYTE_12_XRC_SRQN_S 0 +-#define CQE_BYTE_12_XRC_SRQN_M (((1UL << 24) - 1) << CQE_BYTE_12_XRC_SRQN_S) +- +-#define CQE_BYTE_16_LCL_QPN_S 0 +-#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S) +- +-#define CQE_BYTE_16_SUB_STATUS_S 24 +-#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S) +- +-#define CQE_BYTE_28_SMAC_S 0 +-#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S) +- +-#define CQE_BYTE_28_PORT_TYPE_S 16 +-#define CQE_BYTE_28_PORT_TYPE_M (((1UL << 2) - 1) << CQE_BYTE_28_PORT_TYPE_S) +- +-#define CQE_BYTE_32_RMT_QPN_S 0 +-#define CQE_BYTE_32_RMT_QPN_M (((1UL << 24) - 1) << CQE_BYTE_32_RMT_QPN_S) +- +-#define CQE_BYTE_32_SL_S 24 +-#define CQE_BYTE_32_SL_M (((1UL << 3) - 1) << CQE_BYTE_32_SL_S) +- +-#define CQE_BYTE_32_PORTN_S 27 +-#define CQE_BYTE_32_PORTN_M (((1UL << 3) - 1) << CQE_BYTE_32_PORTN_S) +- +-#define CQE_BYTE_32_GRH_S 30 +- +-#define CQE_BYTE_32_LPK_S 31 ++#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l) ++ ++#define CQE_OPCODE CQE_FIELD_LOC(4, 0) ++#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5) ++#define CQE_S_R CQE_FIELD_LOC(6, 6) ++#define CQE_OWNER CQE_FIELD_LOC(7, 7) ++#define CQE_STATUS CQE_FIELD_LOC(15, 8) ++#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16) ++#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32) ++#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64) ++#define CQE_RSV0 CQE_FIELD_LOC(95, 88) ++#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96) ++#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120) ++#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128) ++#define CQE_SMAC CQE_FIELD_LOC(207, 160) ++#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208) ++#define CQE_VID CQE_FIELD_LOC(221, 210) ++#define CQE_VID_VLD CQE_FIELD_LOC(222, 222) ++#define CQE_RSV2 CQE_FIELD_LOC(223, 223) ++#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224) ++#define CQE_SL CQE_FIELD_LOC(250, 248) ++#define CQE_PORTN CQE_FIELD_LOC(253, 251) ++#define CQE_GRH CQE_FIELD_LOC(254, 254) ++#define CQE_LPK CQE_FIELD_LOC(255, 255) ++#define CQE_RSV3 CQE_FIELD_LOC(511, 256) + + struct hns_roce_rc_sq_wqe { + __le32 byte_4; +-- +2.27.0 + diff --git a/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch b/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch new file mode 100644 index 0000000..fdfab1a --- /dev/null +++ b/0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch @@ -0,0 +1,202 @@ +From 48e8ca01b1e5d033fca6e988d2d280846c95d7e1 Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Fri, 31 Dec 2021 18:01:06 +0800 +Subject: libhns: Fix the calculation of QP/SRQ table size + +The table_size means the maximum number of QP/SRQ. This value may not be +a power of two. The old algorithm will lead to a result that allocates a +mismatched table. + +Fixes: 887b78c80224 ("libhns: Add initial main frame") +Fixes: 9e3df7578153 ("libhns: Support ibv_create_srq_ex") +Signed-off-by: Wenpeng Liang +Signed-off-by: Leon Romanovsky +--- + providers/hns/hns_roce_u.c | 18 +++++++++++------- + providers/hns/hns_roce_u.h | 20 ++++++++++++++------ + providers/hns/hns_roce_u_hw_v1.c | 4 ++-- + providers/hns/hns_roce_u_hw_v2.c | 4 ++-- + providers/hns/hns_roce_u_verbs.c | 9 ++++----- + 5 files changed, 33 insertions(+), 22 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index 9dc4905..6eac4ff 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -92,6 +92,13 @@ static const struct verbs_context_ops hns_common_ops = { + .get_srq_num = hns_roce_u_get_srq_num, + }; + ++static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) ++{ ++ uint32_t count_shift = hr_ilog32(entry_count); ++ ++ return count_shift > size_shift ? count_shift - size_shift : 0; ++} ++ + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +@@ -120,18 +127,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + else + context->cqe_size = HNS_ROCE_V3_CQE_SIZE; + +- context->num_qps = resp.qp_tab_size; +- context->num_srqs = resp.srq_tab_size; +- +- context->qp_table_shift = ffs(context->num_qps) - 1 - +- HNS_ROCE_QP_TABLE_BITS; ++ context->qp_table_shift = calc_table_shift(resp.qp_tab_size, ++ HNS_ROCE_QP_TABLE_BITS); + context->qp_table_mask = (1 << context->qp_table_shift) - 1; + pthread_mutex_init(&context->qp_table_mutex, NULL); + for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + +- context->srq_table_shift = ffs(context->num_srqs) - 1 - +- HNS_ROCE_SRQ_TABLE_BITS; ++ context->srq_table_shift = calc_table_shift(resp.srq_tab_size, ++ HNS_ROCE_SRQ_TABLE_BITS); + context->srq_table_mask = (1 << context->srq_table_shift) - 1; + pthread_mutex_init(&context->srq_table_mutex, NULL); + for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i) +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index df7f485..9366923 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -154,10 +154,8 @@ + + #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) + +-enum { +- HNS_ROCE_QP_TABLE_BITS = 8, +- HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS, +-}; ++#define HNS_ROCE_QP_TABLE_BITS 8 ++#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS) + + #define HNS_ROCE_SRQ_TABLE_BITS 8 + #define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS) +@@ -211,7 +209,6 @@ struct hns_roce_context { + int refcnt; + } qp_table[HNS_ROCE_QP_TABLE_SIZE]; + pthread_mutex_t qp_table_mutex; +- uint32_t num_qps; + uint32_t qp_table_shift; + uint32_t qp_table_mask; + +@@ -220,7 +217,6 @@ struct hns_roce_context { + int refcnt; + } srq_table[HNS_ROCE_SRQ_TABLE_SIZE]; + pthread_mutex_t srq_table_mutex; +- uint32_t num_srqs; + uint32_t srq_table_shift; + uint32_t srq_table_mask; + +@@ -382,6 +378,18 @@ static inline unsigned int hr_ilog32(unsigned int count) + return ilog32(count - 1); + } + ++static inline uint32_t to_hr_qp_table_index(uint32_t qpn, ++ struct hns_roce_context *ctx) ++{ ++ return (qpn >> ctx->qp_table_shift) & (HNS_ROCE_QP_TABLE_SIZE - 1); ++} ++ ++static inline uint32_t to_hr_srq_table_index(uint32_t srqn, ++ struct hns_roce_context *ctx) ++{ ++ return (srqn >> ctx->srq_table_shift) & (HNS_ROCE_SRQ_TABLE_SIZE - 1); ++} ++ + static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev) + { + return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device); +diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c +index 838e004..28ad482 100644 +--- a/providers/hns/hns_roce_u_hw_v1.c ++++ b/providers/hns/hns_roce_u_hw_v1.c +@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq, + static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, + uint32_t qpn) + { +- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ uint32_t tind = to_hr_qp_table_index(qpn, ctx); + + if (ctx->qp_table[tind].refcnt) { + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; +@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx, + + static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn) + { +- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ uint32_t tind = to_hr_qp_table_index(qpn, ctx); + + if (!--ctx->qp_table[tind].refcnt) + free(ctx->qp_table[tind].table); +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 558457a..e39ee7f 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -343,7 +343,7 @@ static void update_cq_db(struct hns_roce_context *ctx, + static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, + uint32_t qpn) + { +- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ uint32_t tind = to_hr_qp_table_index(qpn, ctx); + + if (ctx->qp_table[tind].refcnt) + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; +@@ -354,7 +354,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx, + void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp) + { + uint32_t qpn = qp->verbs_qp.qp.qp_num; +- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ uint32_t tind = to_hr_qp_table_index(qpn, ctx); + + pthread_mutex_lock(&ctx->qp_table_mutex); + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 557d075..5ccb701 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -431,8 +431,7 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) + static int hns_roce_store_srq(struct hns_roce_context *ctx, + struct hns_roce_srq *srq) + { +- uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >> +- ctx->srq_table_shift; ++ uint32_t tind = to_hr_srq_table_index(srq->srqn, ctx); + + pthread_mutex_lock(&ctx->srq_table_mutex); + +@@ -457,7 +456,7 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx, + struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx, + uint32_t srqn) + { +- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; ++ uint32_t tind = to_hr_srq_table_index(srqn, ctx); + + if (ctx->srq_table[tind].refcnt) + return ctx->srq_table[tind].table[srqn & ctx->srq_table_mask]; +@@ -467,7 +466,7 @@ struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx, + + static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn) + { +- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift; ++ uint32_t tind = to_hr_srq_table_index(srqn, ctx); + + pthread_mutex_lock(&ctx->srq_table_mutex); + +@@ -1108,7 +1107,7 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) + { + uint32_t qpn = qp->verbs_qp.qp.qp_num; +- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; ++ uint32_t tind = to_hr_qp_table_index(qpn, ctx); + + pthread_mutex_lock(&ctx->qp_table_mutex); + if (!ctx->qp_table[tind].refcnt) { +-- +2.27.0 + diff --git a/0037-libhns-Fix-wrong-HIP08-version-macro.patch b/0037-libhns-Fix-wrong-HIP08-version-macro.patch new file mode 100644 index 0000000..955e2e6 --- /dev/null +++ b/0037-libhns-Fix-wrong-HIP08-version-macro.patch @@ -0,0 +1,31 @@ +From d4766cd11b985f7f798410129a0b204beb13ecef Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Mon, 17 Jan 2022 20:43:39 +0800 +Subject: libhns: Fix wrong HIP08 version macro + +The version macro of HIP08 should be consistent with the version number +queried from the hardware. + +Fixes: b8cb140e9cd6 ("libhns: Refresh version info before using it") +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h +index 9366923..2b4ba18 100644 +--- a/providers/hns/hns_roce_u.h ++++ b/providers/hns/hns_roce_u.h +@@ -48,8 +48,7 @@ + #include "hns_roce_u_abi.h" + + #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') +- +-#define HNS_ROCE_HW_VER2 ('h' << 24 | 'i' << 16 | '0' << 8 | '8') ++#define HNS_ROCE_HW_VER2 0x100 + #define HNS_ROCE_HW_VER3 0x130 + + #define PFX "hns: " +-- +2.27.0 + diff --git a/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch b/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch new file mode 100644 index 0000000..23e1c0f --- /dev/null +++ b/0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch @@ -0,0 +1,115 @@ +From 203675526b14d9556eeb4212536ebcfc81691c1b Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Mon, 17 Jan 2022 20:43:38 +0800 +Subject: libhns: Fix out-of-bounds write when filling inline data into + extended sge space + +If the buf to store inline data is in the last page of the extended sge +space, filling the entire inline data into the extended sge space at one +time may result in out-of-bounds writing. + +When the remaining space at the end of the extended sge is not enough to +accommodate the entire inline data, the inline data needs to be filled +into the extended sge space in two steps: +(1) The front part of the inline data is filled into the remaining space + at the end of the extended sge. +(2) The remaining inline data is filled into the header space of the + extended sge. + +Fixes: b7814b7b9715("libhns: Support inline data in extented sge space for RC") +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 40 ++++++++++++++++++++++---------- + 1 file changed, 28 insertions(+), 12 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index e39ee7f..20745dc 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -772,21 +772,43 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + struct hns_roce_sge_info *sge_info) + { + unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg); +- void *dseg; ++ unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; ++ void *dst_addr, *src_addr, *tail_bound_addr; ++ uint32_t src_len, tail_len; + int i; + ++ + if (sge_info->total_len > qp->sq.max_gs * sge_sz) + return EINVAL; + +- dseg = get_send_sge_ex(qp, sge_info->start_idx); ++ dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask); ++ tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask); + + for (i = 0; i < wr->num_sge; i++) { +- memcpy(dseg, (void *)(uintptr_t)wr->sg_list[i].addr, +- wr->sg_list[i].length); +- dseg += wr->sg_list[i].length; ++ tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr; ++ ++ src_addr = (void *)(uintptr_t)wr->sg_list[i].addr; ++ src_len = wr->sg_list[i].length; ++ ++ if (src_len < tail_len) { ++ memcpy(dst_addr, src_addr, src_len); ++ dst_addr += src_len; ++ } else if (src_len == tail_len) { ++ memcpy(dst_addr, src_addr, src_len); ++ dst_addr = get_send_sge_ex(qp, 0); ++ } else { ++ memcpy(dst_addr, src_addr, tail_len); ++ dst_addr = get_send_sge_ex(qp, 0); ++ src_addr += tail_len; ++ src_len -= tail_len; ++ ++ memcpy(dst_addr, src_addr, src_len); ++ dst_addr += src_len; ++ } + } + +- sge_info->start_idx += DIV_ROUND_UP(sge_info->total_len, sge_sz); ++ sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, sge_sz); ++ sge_info->start_idx += sge_info->valid_num; + + return 0; + } +@@ -828,7 +850,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + struct hns_roce_ud_sq_wqe *ud_sq_wqe, + struct hns_roce_sge_info *sge_info) + { +- unsigned int sge_idx = sge_info->start_idx; + int ret; + + if (!check_inl_data_len(qp, sge_info->total_len)) +@@ -845,8 +866,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + if (ret) + return ret; + +- sge_info->valid_num = sge_info->start_idx - sge_idx; +- + hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num); + } + +@@ -969,7 +988,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + struct hns_roce_rc_sq_wqe *rc_sq_wqe, + struct hns_roce_sge_info *sge_info) + { +- unsigned int sge_idx = sge_info->start_idx; + void *dseg = rc_sq_wqe; + int ret; + int i; +@@ -997,8 +1015,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, + if (ret) + return ret; + +- sge_info->valid_num = sge_info->start_idx - sge_idx; +- + hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num); + } + +-- +2.27.0 + diff --git a/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch b/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch new file mode 100644 index 0000000..954be02 --- /dev/null +++ b/0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch @@ -0,0 +1,171 @@ +From 85a5aa79327f45e4bea8d7ad0e55842225ca676a Mon Sep 17 00:00:00 2001 +From: Wenpeng Liang +Date: Tue, 18 Jan 2022 19:58:51 +0800 +Subject: libhns: Clear remaining unused sges when post recv + +The HIP09 requires the driver to clear the unused data segments in wqe +buffer to make the hns ROCEE stop reading the remaining invalid sges for +RQ. + +Signed-off-by: Wenpeng Liang +--- + providers/hns/hns_roce_u_hw_v2.c | 88 ++++++++++++++------------------ + 1 file changed, 39 insertions(+), 49 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 20745dc..6b0d7f1 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -85,14 +85,6 @@ static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + dseg->len = htole32(sg->length); + } + +-/* Fill an ending sge to make hw stop reading the remaining sges in wqe */ +-static inline void set_ending_data_seg(struct hns_roce_v2_wqe_data_seg *dseg) +-{ +- dseg->lkey = htole32(0x0); +- dseg->addr = 0; +- dseg->len = htole32(INVALID_SGE_LENGTH); +-} +- + static void set_extend_atomic_seg(struct hns_roce_qp *qp, unsigned int sge_cnt, + struct hns_roce_sge_info *sge_info, void *buf) + { +@@ -1247,23 +1239,43 @@ static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx) + return 0; + } + +-static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr, +- unsigned int wqe_idx) ++static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe, ++ unsigned int max_sge, bool rsv) + { +- struct hns_roce_v2_wqe_data_seg *dseg; +- struct hns_roce_rinl_sge *sge_list; +- int i; ++ struct hns_roce_v2_wqe_data_seg *dseg = wqe; ++ unsigned int i, cnt; + +- dseg = get_recv_wqe_v2(qp, wqe_idx); +- for (i = 0; i < wr->num_sge; i++) { ++ for (i = 0, cnt = 0; i < wr->num_sge; i++) { ++ /* Skip zero-length sge */ + if (!wr->sg_list[i].length) + continue; +- set_data_seg_v2(dseg, wr->sg_list + i); +- dseg++; ++ ++ set_data_seg_v2(dseg + cnt, wr->sg_list + i); ++ cnt++; + } + +- if (qp->rq.rsv_sge) +- set_ending_data_seg(dseg); ++ /* Fill a reserved sge to make ROCEE stop reading remaining segments */ ++ if (rsv) { ++ dseg[cnt].lkey = 0; ++ dseg[cnt].addr = 0; ++ dseg[cnt].len = htole32(INVALID_SGE_LENGTH); ++ } else { ++ /* Clear remaining segments to make ROCEE ignore sges */ ++ if (cnt < max_sge) ++ memset(dseg + cnt, 0, ++ (max_sge - cnt) * HNS_ROCE_SGE_SIZE); ++ } ++} ++ ++static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr, ++ unsigned int wqe_idx, unsigned int max_sge) ++{ ++ struct hns_roce_rinl_sge *sge_list; ++ unsigned int i; ++ void *wqe; ++ ++ wqe = get_recv_wqe_v2(qp, wqe_idx); ++ fill_recv_sge_to_wqe(wr, wqe, max_sge, qp->rq.rsv_sge); + + if (!qp->rq_rinl_buf.wqe_cnt) + return; +@@ -1310,7 +1322,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + } + + wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); +- fill_rq_wqe(qp, wr, wqe_idx); ++ fill_rq_wqe(qp, wr, wqe_idx, max_sge); + qp->rq.wrid[wqe_idx] = wr->wr_id; + } + +@@ -1536,10 +1548,8 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq) + } + + static int check_post_srq_valid(struct hns_roce_srq *srq, +- struct ibv_recv_wr *wr) ++ struct ibv_recv_wr *wr, unsigned int max_sge) + { +- unsigned int max_sge = srq->max_gs - srq->rsv_sge; +- + if (hns_roce_v2_srqwq_overflow(srq)) + return -ENOMEM; + +@@ -1575,28 +1585,6 @@ static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx) + return 0; + } + +-static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx, +- struct ibv_recv_wr *wr) +-{ +- struct hns_roce_v2_wqe_data_seg *dseg; +- int i; +- +- dseg = get_srq_wqe(srq, wqe_idx); +- +- for (i = 0; i < wr->num_sge; ++i) { +- dseg[i].len = htole32(wr->sg_list[i].length); +- dseg[i].lkey = htole32(wr->sg_list[i].lkey); +- dseg[i].addr = htole64(wr->sg_list[i].addr); +- } +- +- /* hw stop reading when identify the last one */ +- if (srq->rsv_sge) { +- dseg[i].len = htole32(INVALID_SGE_LENGTH); +- dseg[i].lkey = htole32(0x0); +- dseg[i].addr = 0; +- } +-} +- + static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) + { + struct hns_roce_idx_que *idx_que = &srq->idx_que; +@@ -1624,15 +1612,16 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + { + struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); + struct hns_roce_srq *srq = to_hr_srq(ib_srq); ++ unsigned int wqe_idx, max_sge, nreq; + struct hns_roce_db srq_db; +- unsigned int wqe_idx; + int ret = 0; +- int nreq; ++ void *wqe; + + pthread_spin_lock(&srq->lock); + ++ max_sge = srq->max_gs - srq->rsv_sge; + for (nreq = 0; wr; ++nreq, wr = wr->next) { +- ret = check_post_srq_valid(srq, wr); ++ ret = check_post_srq_valid(srq, wr, max_sge); + if (ret) { + *bad_wr = wr; + break; +@@ -1644,7 +1633,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + break; + } + +- fill_srq_wqe(srq, wqe_idx, wr); ++ wqe = get_srq_wqe(srq, wqe_idx); ++ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); + fill_wqe_idx(srq, wqe_idx); + + srq->wrid[wqe_idx] = wr->wr_id; +-- +2.27.0 + diff --git a/rdma-core.spec b/rdma-core.spec index a70578d..328e86d 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 35.1 -Release: 4 +Release: 5 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url: https://github.com/linux-rdma/rdma-core @@ -31,6 +31,21 @@ Patch21: 0021-libhns-Fix-wrong-type-of-variables-and-fields.patch Patch22: 0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch Patch23: 0023-libhns-Remove-redundant-variable-initialization.patch Patch24: 0024-libhns-Remove-unused-macros.patch +Patch25: 0025-libhns-Refactor-the-poll-one-interface.patch +Patch26: 0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch +Patch27: 0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch +Patch28: 0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch +Patch29: 0029-libhns-Add-vendor_err-information-for-error-WC.patch +Patch30: 0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch +Patch31: 0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch +Patch32: 0032-util-Fix-mmio-memcpy-on-ARM.patch +Patch33: 0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch +Patch34: 0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch +Patch35: 0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch +Patch36: 0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch +Patch37: 0037-libhns-Fix-wrong-HIP08-version-macro.patch +Patch38: 0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch +Patch39: 0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -275,6 +290,12 @@ fi %{_mandir}/* %changelog +* Fri Aug 12 2022 luozhengfeng - 35.1-5 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Backport bugfix and refactor patches for hns from v39 + * Wed Aug 03 2022 luozhengfeng - 35.1-4 - Type: enhancement - ID: NA