rdma-core/0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch
zhengfeng luo 910edf6453 Support CQ_EX for hns
Add support for hns CQ_EX and all its dependent patches are also added.

Signed-off-by: zhengfeng luo <luozhengfeng@h-partners.com>
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
2022-08-29 21:05:59 +08:00

1008 lines
28 KiB
Diff

From 36446a56eea5db54e229207bf39c796df16f519a Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Mon, 21 Mar 2022 09:32:04 +0800
Subject: libhns: Extended QP supports the new post send mechanism
The ofed provides a new set of post send APIs for extended QP. With the new
APIs, users can post send WR more efficiently. The hns driver provides
support for the new APIs.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 6 +
providers/hns/hns_roce_u_hw_v2.c | 814 +++++++++++++++++++++++++++++--
providers/hns/hns_roce_u_hw_v2.h | 7 +
providers/hns/hns_roce_u_verbs.c | 11 +-
4 files changed, 792 insertions(+), 46 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 171fe06e..96059172 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -338,6 +338,12 @@ struct hns_roce_qp {
unsigned long flags;
int refcnt; /* specially used for XRC */
void *dwqe_page;
+
+ /* specific fields for the new post send APIs */
+ int err;
+ void *cur_wqe;
+ unsigned int rb_sq_head; /* roll back sq head */
+ struct hns_roce_sge_info sge_info;
};
struct hns_roce_av {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index fab1939b..0169250d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -78,7 +78,7 @@ static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu)
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n);
static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
- struct ibv_sge *sg)
+ const struct ibv_sge *sg)
{
dseg->lkey = htole32(sg->lkey);
dseg->addr = htole64(sg->addr);
@@ -824,9 +824,28 @@ static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
sge_info->total_len = len;
}
+static void get_src_buf_info(void **src_addr, uint32_t *src_len,
+ const void *buf_list, int buf_idx,
+ enum hns_roce_wr_buf_type type)
+{
+ if (type == WR_BUF_TYPE_POST_SEND) {
+ const struct ibv_sge *sg_list = buf_list;
+
+ *src_addr = (void *)(uintptr_t)sg_list[buf_idx].addr;
+ *src_len = sg_list[buf_idx].length;
+ } else {
+ const struct ibv_data_buf *bf_list = buf_list;
+
+ *src_addr = bf_list[buf_idx].addr;
+ *src_len = bf_list[buf_idx].length;
+ }
+}
+
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
- const struct ibv_send_wr *wr,
- struct hns_roce_sge_info *sge_info)
+ struct hns_roce_sge_info *sge_info,
+ const void *buf_list,
+ uint32_t num_buf,
+ enum hns_roce_wr_buf_type buf_type)
{
unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg);
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
@@ -834,18 +853,15 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
uint32_t src_len, tail_len;
int i;
-
if (sge_info->total_len > qp->sq.max_gs * sge_sz)
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask);
- for (i = 0; i < wr->num_sge; i++) {
+ for (i = 0; i < num_buf; i++) {
tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr;
-
- src_addr = (void *)(uintptr_t)wr->sg_list[i].addr;
- src_len = wr->sg_list[i].length;
+ get_src_buf_info(&src_addr, &src_len, buf_list, i, buf_type);
if (src_len < tail_len) {
memcpy(dst_addr, src_addr, src_len);
@@ -870,20 +886,11 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
return 0;
}
-static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr,
- struct hns_roce_ud_sq_wqe *ud_sq_wqe)
+static void set_ud_inl_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
+ uint8_t *data)
{
- uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {0};
uint32_t *loc = (uint32_t *)data;
uint32_t tmp_data;
- void *tmp = data;
- int i;
-
- for (i = 0; i < wr->num_sge; i++) {
- memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr,
- wr->sg_list[i].length);
- tmp += wr->sg_list[i].length;
- }
hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff);
hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff);
@@ -896,6 +903,22 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr,
hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16);
}
+static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr,
+ struct hns_roce_ud_sq_wqe *ud_sq_wqe)
+{
+ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {};
+ void *tmp = data;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(tmp, (void *)(uintptr_t)wr->sg_list[i].addr,
+ wr->sg_list[i].length);
+ tmp += wr->sg_list[i].length;
+ }
+
+ set_ud_inl_seg(ud_sq_wqe, data);
+}
+
static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
{
int mtu = mtu_enum_to_int(qp->path_mtu);
@@ -919,7 +942,9 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
} else {
hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE);
- ret = fill_ext_sge_inl_data(qp, wr, sge_info);
+ ret = fill_ext_sge_inl_data(qp, sge_info,
+ wr->sg_list, wr->num_sge,
+ WR_BUF_TYPE_POST_SEND);
if (ret)
return ret;
@@ -995,6 +1020,23 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
return ret;
}
+static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe,
+ unsigned int index)
+{
+ struct hns_roce_rc_sq_wqe *wqe = sq_wqe;
+
+ /*
+ * The pipeline can sequentially post all valid WQEs in wq buf,
+ * including those new WQEs waiting for doorbell to update the PI again.
+ * Therefore, the valid bit of WQE MUST be updated after all of fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ udma_to_device_barrier();
+
+ hr_reg_write_bool(wqe, RCWQE_OWNER, !(index & BIT(qp->sq.shift)));
+}
+
static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
unsigned int nreq, struct hns_roce_sge_info *sge_info)
{
@@ -1026,17 +1068,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
if (ret)
return ret;
- /*
- * The pipeline can sequentially post all valid WQEs in wq buf,
- * including those new WQEs waiting for doorbell to update the PI again.
- * Therefore, the valid bit of WQE MUST be updated after all of fields
- * and extSGEs have been written into DDR instead of cache.
- */
- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
- udma_to_device_barrier();
-
- hr_reg_write_bool(wqe, RCWQE_OWNER,
- !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
+ enable_wqe(qp, ud_sq_wqe, qp->sq.head + nreq);
return ret;
}
@@ -1068,7 +1100,9 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
} else {
hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE);
- ret = fill_ext_sge_inl_data(qp, wr, sge_info);
+ ret = fill_ext_sge_inl_data(qp, sge_info,
+ wr->sg_list, wr->num_sge,
+ WR_BUF_TYPE_POST_SEND);
if (ret)
return ret;
@@ -1189,17 +1223,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return ret;
wqe_valid:
- /*
- * The pipeline can sequentially post all valid WQEs into WQ buffer,
- * including new WQEs waiting for the doorbell to update the PI again.
- * Therefore, the owner bit of WQE MUST be updated after all fields
- * and extSGEs have been written into DDR instead of cache.
- */
- if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
- udma_to_device_barrier();
-
- hr_reg_write_bool(wqe, RCWQE_OWNER,
- !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
+ enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
return 0;
}
@@ -1921,6 +1945,710 @@ void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags)
cq_ex->read_cvlan = wc_read_cvlan;
}
+static struct hns_roce_rc_sq_wqe *
+init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
+{
+ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags;
+ struct hns_roce_rc_sq_wqe *wqe;
+ unsigned int wqe_idx;
+
+ if (hns_roce_v2_wq_overflow(&qp->sq, 0,
+ to_hr_cq(qp->verbs_qp.qp.send_cq))) {
+ qp->cur_wqe = NULL;
+ qp->err = ENOMEM;
+ return NULL;
+ }
+
+ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
+ wqe = get_send_wqe(qp, wqe_idx);
+
+ hr_reg_write(wqe, RCWQE_OPCODE, opcode);
+ hr_reg_write_bool(wqe, RCWQE_CQE, send_flags & IBV_SEND_SIGNALED);
+ hr_reg_write_bool(wqe, RCWQE_FENCE, send_flags & IBV_SEND_FENCE);
+ hr_reg_write_bool(wqe, RCWQE_SE, send_flags & IBV_SEND_SOLICITED);
+ hr_reg_clear(wqe, RCWQE_INLINE);
+ hr_reg_clear(wqe, RCWQE_SO);
+
+ qp->sq.wrid[wqe_idx] = wr_id;
+ qp->cur_wqe = wqe;
+ qp->sq.head++;
+
+ return wqe;
+}
+
+static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
+ uint64_t addr, uint32_t length)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+
+ if (!wqe)
+ return;
+
+ hr_reg_write(wqe, RCWQE_LKEY0, lkey);
+ hr_reg_write(wqe, RCWQE_VA0_L, addr);
+ hr_reg_write(wqe, RCWQE_VA0_H, addr >> 32);
+
+ wqe->msg_len = htole32(length);
+ hr_reg_write(wqe, RCWQE_LEN0, length);
+ hr_reg_write(wqe, RCWQE_SGE_NUM, !!length);
+ /* ignore ex sge start index */
+
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, const struct ibv_sge *sge,
+ size_t num_sge)
+{
+ unsigned int index = qp->sge_info.start_idx;
+ unsigned int mask = qp->ex_sge.sge_cnt - 1;
+ unsigned int msg_len = 0;
+ unsigned int cnt = 0;
+ int i;
+
+ for (i = 0; i < num_sge; i++) {
+ if (!sge[i].length)
+ continue;
+
+ msg_len += sge[i].length;
+ cnt++;
+
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
+ set_data_seg_v2(dseg, &sge[i]);
+ dseg++;
+ } else {
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, &sge[i]);
+ index++;
+ }
+ }
+
+ qp->sge_info.start_idx = index;
+ qp->sge_info.valid_num = cnt;
+ qp->sge_info.total_len = msg_len;
+}
+
+static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
+ const struct ibv_sge *sg_list)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+ struct hns_roce_v2_wqe_data_seg *dseg;
+
+ if (!wqe)
+ return;
+
+ if (num_sge > qp->sq.max_gs) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX,
+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1));
+
+ dseg = (void *)(wqe + 1);
+ set_sgl_rc(dseg, qp, sg_list, num_sge);
+
+ wqe->msg_len = htole32(qp->sge_info.total_len);
+ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
+
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_send_rc(struct ibv_qp_ex *ibv_qp)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+
+ init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND);
+}
+
+static void wr_send_imm_rc(struct ibv_qp_ex *ibv_qp, __be32 imm_data)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM);
+ if (!wqe)
+ return;
+
+ wqe->immtdata = htole32(be32toh(imm_data));
+}
+
+static void wr_send_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_INV);
+ if (!wqe)
+ return;
+
+ wqe->inv_key = htole32(invalidate_rkey);
+}
+
+static void wr_local_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_LOCAL_INV);
+ if (!wqe)
+ return;
+
+ hr_reg_enable(wqe, RCWQE_SO);
+ wqe->inv_key = htole32(invalidate_rkey);
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_set_xrc_srqn(struct ibv_qp_ex *ibv_qp, uint32_t remote_srqn)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+
+ if (!wqe)
+ return;
+
+ hr_reg_write(wqe, RCWQE_XRC_SRQN, remote_srqn);
+}
+
+static void wr_rdma_read(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_READ);
+ if (!wqe)
+ return;
+
+ wqe->va = htole64(remote_addr);
+ wqe->rkey = htole32(rkey);
+}
+
+static void wr_rdma_write(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_RDMA_WRITE);
+ if (!wqe)
+ return;
+
+ wqe->va = htole64(remote_addr);
+ wqe->rkey = htole32(rkey);
+}
+
+static void wr_rdma_write_imm(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr, __be32 imm_data)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id,
+ HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM);
+ if (!wqe)
+ return;
+
+ wqe->va = htole64(remote_addr);
+ wqe->rkey = htole32(rkey);
+ wqe->immtdata = htole32(be32toh(imm_data));
+}
+
+static void set_wr_atomic(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr, uint64_t compare_add,
+ uint64_t swap, uint32_t opcode)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_wqe_atomic_seg *aseg;
+ struct hns_roce_rc_sq_wqe *wqe;
+
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id, opcode);
+ if (!wqe)
+ return;
+
+ wqe->va = htole64(remote_addr);
+ wqe->rkey = htole32(rkey);
+
+ dseg = (void *)(wqe + 1);
+ aseg = (void *)(dseg + 1);
+
+ if (opcode == HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP) {
+ aseg->fetchadd_swap_data = htole64(swap);
+ aseg->cmp_data = htole64(compare_add);
+ } else {
+ aseg->fetchadd_swap_data = htole64(compare_add);
+ aseg->cmp_data = 0;
+ }
+}
+
+static void wr_atomic_cmp_swp(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr, uint64_t compare,
+ uint64_t swap)
+{
+ set_wr_atomic(ibv_qp, rkey, remote_addr, compare, swap,
+ HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP);
+}
+
+static void wr_atomic_fetch_add(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr, uint64_t add)
+{
+ set_wr_atomic(ibv_qp, rkey, remote_addr, add, 0,
+ HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD);
+}
+
+static void set_inline_data_list_rc(struct hns_roce_qp *qp,
+ struct hns_roce_rc_sq_wqe *wqe,
+ size_t num_buf,
+ const struct ibv_data_buf *buf_list)
+{
+ unsigned int msg_len = qp->sge_info.total_len;
+ void *dseg;
+ int ret;
+ int i;
+
+ hr_reg_enable(wqe, RCWQE_INLINE);
+
+ wqe->msg_len = htole32(msg_len);
+ if (msg_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) {
+ hr_reg_clear(wqe, RCWQE_INLINE_TYPE);
+ /* ignore ex sge start index */
+
+ dseg = wqe + 1;
+ for (i = 0; i < num_buf; i++) {
+ memcpy(dseg, buf_list[i].addr, buf_list[i].length);
+ dseg += buf_list[i].length;
+ }
+ /* ignore sge num */
+ } else {
+ if (!check_inl_data_len(qp, msg_len)) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_enable(wqe, RCWQE_INLINE_TYPE);
+ hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX,
+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1));
+
+ ret = fill_ext_sge_inl_data(qp, &qp->sge_info,
+ buf_list, num_buf,
+ WR_BUF_TYPE_SEND_WR_OPS);
+ if (ret) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
+ }
+}
+
+static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr,
+ size_t length)
+{
+ struct ibv_data_buf buff = { .addr = addr, .length = length };
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+
+ if (!wqe)
+ return;
+
+ buff.addr = addr;
+ buff.length = length;
+
+ qp->sge_info.total_len = length;
+ set_inline_data_list_rc(qp, wqe, 1, &buff);
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ const struct ibv_data_buf *buf_list)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
+ int i;
+
+ if (!wqe)
+ return;
+
+ qp->sge_info.total_len = 0;
+ for (i = 0; i < num_buf; i++)
+ qp->sge_info.total_len += buf_list[i].length;
+
+ set_inline_data_list_rc(qp, wqe, num_buf, buf_list);
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static struct hns_roce_ud_sq_wqe *
+init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
+{
+ unsigned int send_flags = qp->verbs_qp.qp_ex.wr_flags;
+ struct hns_roce_ud_sq_wqe *wqe;
+ unsigned int wqe_idx;
+
+ if (hns_roce_v2_wq_overflow(&qp->sq, 0,
+ to_hr_cq(qp->verbs_qp.qp.send_cq))) {
+ qp->cur_wqe = NULL;
+ qp->err = ENOMEM;
+ return NULL;
+ }
+
+ wqe_idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
+ wqe = get_send_wqe(qp, wqe_idx);
+
+ hr_reg_write(wqe, UDWQE_OPCODE, opcode);
+ hr_reg_write_bool(wqe, UDWQE_CQE, send_flags & IBV_SEND_SIGNALED);
+ hr_reg_write_bool(wqe, UDWQE_SE, send_flags & IBV_SEND_SOLICITED);
+ hr_reg_clear(wqe, UDWQE_INLINE);
+
+ qp->sq.wrid[wqe_idx] = wr_id;
+ qp->cur_wqe = wqe;
+ qp->sq.head++;
+
+ return wqe;
+}
+
+static void wr_send_ud(struct ibv_qp_ex *ibv_qp)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+
+ init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND);
+}
+
+static void wr_send_imm_ud(struct ibv_qp_ex *ibv_qp, __be32 imm_data)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe;
+
+ wqe = init_ud_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_SEND_WITH_IMM);
+ if (!wqe)
+ return;
+
+ wqe->immtdata = htole32(be32toh(imm_data));
+}
+
+static void wr_set_ud_addr(struct ibv_qp_ex *ibv_qp, struct ibv_ah *ah,
+ uint32_t remote_qpn, uint32_t remote_qkey)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+ struct hns_roce_ah *hr_ah = to_hr_ah(ah);
+ int ret;
+
+ if (!wqe)
+ return;
+
+ wqe->qkey = htole32(remote_qkey & 0x80000000 ? qp->qkey : remote_qkey);
+
+ hr_reg_write(wqe, UDWQE_DQPN, remote_qpn);
+
+ ret = fill_ud_av(wqe, hr_ah);
+ if (ret)
+ qp->err = ret;
+
+ qp->sl = hr_ah->av.sl;
+}
+
+static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
+ uint64_t addr, uint32_t length)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ int sge_idx;
+
+ if (!wqe)
+ return;
+
+ wqe->msg_len = htole32(length);
+ hr_reg_write(wqe, UDWQE_SGE_NUM, 1);
+ sge_idx = qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1);
+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx);
+
+ dseg = get_send_sge_ex(qp, sge_idx);
+
+ dseg->lkey = htole32(lkey);
+ dseg->addr = htole64(addr);
+ dseg->len = htole32(length);
+
+ qp->sge_info.start_idx++;
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
+ const struct ibv_sge *sg_list)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ unsigned int sge_idx = qp->sge_info.start_idx;
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+ unsigned int mask = qp->ex_sge.sge_cnt - 1;
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ unsigned int msg_len = 0;
+ unsigned int cnt = 0;
+
+ if (!wqe)
+ return;
+
+ if (num_sge > qp->sq.max_gs) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask);
+ for (int i = 0; i < num_sge; i++) {
+ if (!sg_list[i].length)
+ continue;
+
+ dseg = get_send_sge_ex(qp, sge_idx & mask);
+ set_data_seg_v2(dseg, &sg_list[i]);
+
+ msg_len += sg_list[i].length;
+ cnt++;
+ sge_idx++;
+ }
+
+ wqe->msg_len = htole32(msg_len);
+ hr_reg_write(wqe, UDWQE_SGE_NUM, cnt);
+
+ qp->sge_info.start_idx += cnt;
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void set_inline_data_list_ud(struct hns_roce_qp *qp,
+ struct hns_roce_ud_sq_wqe *wqe,
+ size_t num_buf,
+ const struct ibv_data_buf *buf_list)
+{
+ uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {};
+ unsigned int msg_len = qp->sge_info.total_len;
+ void *tmp;
+ int ret;
+ int i;
+
+ if (!check_inl_data_len(qp, msg_len)) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_enable(wqe, UDWQE_INLINE);
+
+ wqe->msg_len = htole32(msg_len);
+ if (msg_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) {
+ hr_reg_clear(wqe, UDWQE_INLINE_TYPE);
+ /* ignore ex sge start index */
+
+ tmp = data;
+ for (i = 0; i < num_buf; i++) {
+ memcpy(tmp, buf_list[i].addr, buf_list[i].length);
+ tmp += buf_list[i].length;
+ }
+
+ set_ud_inl_seg(wqe, data);
+ /* ignore sge num */
+ } else {
+ hr_reg_enable(wqe, UDWQE_INLINE_TYPE);
+ hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX,
+ qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1));
+
+ ret = fill_ext_sge_inl_data(qp, &qp->sge_info,
+ buf_list, num_buf,
+ WR_BUF_TYPE_SEND_WR_OPS);
+ if (ret) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ hr_reg_write(wqe, UDWQE_SGE_NUM, qp->sge_info.valid_num);
+ }
+}
+
+static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr,
+ size_t length)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+ struct ibv_data_buf buff;
+
+ if (!wqe)
+ return;
+
+ buff.addr = addr;
+ buff.length = length;
+
+ qp->sge_info.total_len = length;
+ set_inline_data_list_ud(qp, wqe, 1, &buff);
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
+ const struct ibv_data_buf *buf_list)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
+ int i;
+
+ if (!wqe)
+ return;
+
+ qp->sge_info.total_len = 0;
+ for (i = 0; i < num_buf; i++)
+ qp->sge_info.total_len += buf_list[i].length;
+
+ set_inline_data_list_ud(qp, wqe, num_buf, buf_list);
+ enable_wqe(qp, wqe, qp->sq.head);
+}
+
+static void wr_start(struct ibv_qp_ex *ibv_qp)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ enum ibv_qp_state state = ibv_qp->qp_base.state;
+
+ if (state == IBV_QPS_RESET ||
+ state == IBV_QPS_INIT ||
+ state == IBV_QPS_RTR) {
+ qp->err = EINVAL;
+ return;
+ }
+
+ pthread_spin_lock(&qp->sq.lock);
+ qp->sge_info.start_idx = qp->next_sge;
+ qp->rb_sq_head = qp->sq.head;
+ qp->err = 0;
+}
+
+static int wr_complete(struct ibv_qp_ex *ibv_qp)
+{
+ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+ unsigned int nreq = qp->sq.head - qp->rb_sq_head;
+ struct ibv_qp_attr attr;
+ int err = qp->err;
+
+ if (err) {
+ qp->sq.head = qp->rb_sq_head;
+ goto out;
+ }
+
+ if (nreq) {
+ qp->next_sge = qp->sge_info.start_idx;
+ udma_to_device_barrier();
+
+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ hns_roce_write_dwqe(qp, qp->cur_wqe);
+ else
+ hns_roce_update_sq_db(ctx, qp);
+
+ if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ *(qp->sdb) = qp->sq.head & 0xffff;
+ }
+
+out:
+ pthread_spin_unlock(&qp->sq.lock);
+ if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
+ attr.qp_state = IBV_QPS_ERR;
+ hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
+ }
+
+ return err;
+}
+
+static void wr_abort(struct ibv_qp_ex *ibv_qp)
+{
+ struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
+
+ qp->sq.head = qp->rb_sq_head;
+
+ pthread_spin_unlock(&qp->sq.lock);
+}
+
+enum {
+ HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC =
+ IBV_QP_EX_WITH_SEND |
+ IBV_QP_EX_WITH_SEND_WITH_INV |
+ IBV_QP_EX_WITH_SEND_WITH_IMM |
+ IBV_QP_EX_WITH_RDMA_WRITE |
+ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
+ IBV_QP_EX_WITH_RDMA_READ |
+ IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD |
+ IBV_QP_EX_WITH_LOCAL_INV,
+ HNS_SUPPORTED_SEND_OPS_FLAGS_UD =
+ IBV_QP_EX_WITH_SEND |
+ IBV_QP_EX_WITH_SEND_WITH_IMM,
+};
+
+static void fill_send_wr_ops_rc_xrc(struct ibv_qp_ex *qp_ex)
+{
+ qp_ex->wr_send = wr_send_rc;
+ qp_ex->wr_send_imm = wr_send_imm_rc;
+ qp_ex->wr_send_inv = wr_send_inv_rc;
+ qp_ex->wr_rdma_read = wr_rdma_read;
+ qp_ex->wr_rdma_write = wr_rdma_write;
+ qp_ex->wr_rdma_write_imm = wr_rdma_write_imm;
+ qp_ex->wr_set_inline_data = wr_set_inline_data_rc;
+ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_rc;
+ qp_ex->wr_local_inv = wr_local_inv_rc;
+ qp_ex->wr_atomic_cmp_swp = wr_atomic_cmp_swp;
+ qp_ex->wr_atomic_fetch_add = wr_atomic_fetch_add;
+ qp_ex->wr_set_sge = wr_set_sge_rc;
+ qp_ex->wr_set_sge_list = wr_set_sge_list_rc;
+}
+
+static void fill_send_wr_ops_ud(struct ibv_qp_ex *qp_ex)
+{
+ qp_ex->wr_send = wr_send_ud;
+ qp_ex->wr_send_imm = wr_send_imm_ud;
+ qp_ex->wr_set_ud_addr = wr_set_ud_addr;
+ qp_ex->wr_set_inline_data = wr_set_inline_data_ud;
+ qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_ud;
+ qp_ex->wr_set_sge = wr_set_sge_ud;
+ qp_ex->wr_set_sge_list = wr_set_sge_list_ud;
+}
+
+static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr,
+ struct ibv_qp_ex *qp_ex)
+{
+ uint64_t ops = attr->send_ops_flags;
+
+ qp_ex->wr_start = wr_start;
+ qp_ex->wr_complete = wr_complete;
+ qp_ex->wr_abort = wr_abort;
+
+ switch (attr->qp_type) {
+ case IBV_QPT_XRC_SEND:
+ qp_ex->wr_set_xrc_srqn = wr_set_xrc_srqn;
+ SWITCH_FALLTHROUGH;
+ case IBV_QPT_RC:
+ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_RC_XRC)
+ return -EOPNOTSUPP;
+ fill_send_wr_ops_rc_xrc(qp_ex);
+ break;
+ case IBV_QPT_UD:
+ if (ops & ~HNS_SUPPORTED_SEND_OPS_FLAGS_UD)
+ return -EOPNOTSUPP;
+ fill_send_wr_ops_ud(qp_ex);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp)
+{
+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
+ if (fill_send_wr_ops(attr, &qp->verbs_qp.qp_ex))
+ return -EOPNOTSUPP;
+
+ qp->verbs_qp.comp_mask |= VERBS_QP_EX;
+ }
+
+ return 0;
+}
+
const struct hns_roce_u_hw hns_roce_u_hw_v2 = {
.hw_version = HNS_ROCE_HW_VER2,
.hw_ops = {
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 122fdbdf..098dbdf4 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -122,6 +122,11 @@ enum {
HNS_ROCE_V2_CQ_DB_NTR,
};
+enum hns_roce_wr_buf_type {
+ WR_BUF_TYPE_POST_SEND,
+ WR_BUF_TYPE_SEND_WR_OPS,
+};
+
struct hns_roce_db {
__le32 byte_4;
__le32 parameter;
@@ -339,5 +344,7 @@ struct hns_roce_ud_sq_wqe {
void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
+int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp);
#endif /* _HNS_ROCE_U_HW_V2_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 9ea8a6d3..1457a1a2 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -768,7 +768,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
}
enum {
- CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD,
+ CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD |
+ IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
};
static int check_qp_create_mask(struct hns_roce_context *ctx,
@@ -1270,9 +1271,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_cmd;
+ ret = hns_roce_attach_qp_ex_ops(attr, qp);
+ if (ret)
+ goto err_ops;
+
ret = hns_roce_store_qp(context, qp);
if (ret)
- goto err_store;
+ goto err_ops;
if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key);
@@ -1286,7 +1291,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
err_dwqe:
hns_roce_v2_clear_qp(context, qp);
-err_store:
+err_ops:
ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
err_cmd:
hns_roce_free_qp_buf(qp, context);
--
2.30.0