!43 Update to 41.0

From: @hellotcc 
Reviewed-by: @li-yangyang20 
Signed-off-by: @li-yangyang20
This commit is contained in:
openeuler-ci-bot 2022-09-27 09:04:23 +00:00 committed by Gitee
commit 5e0943666e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
51 changed files with 8 additions and 8872 deletions

View File

@ -1,41 +0,0 @@
From 693d55e80976217215844258e5b78bc115382689 Mon Sep 17 00:00:00 2001
From: Guofeng Yue <yueguofeng@hisilicon.com>
Date: Mon, 10 Jan 2022 10:44:23 +0800
Subject: [PATCH 1/8] Update kernel headers
To commit 62c4d8878d13 ("RDMA/hns: Remove support for HIP06").
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
---
kernel-headers/rdma/hns-abi.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 42b17765..abfd36e2 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -77,17 +77,19 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
};
struct hns_roce_ib_create_qp_resp {
__aligned_u64 cap_flags;
+ __aligned_u64 dwqe_mmap_key;
};
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
- __u32 srq_tab_size;
- __u32 reserved;
+ __u32 srq_tab_size;
+ __u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {
--
2.33.0

View File

@ -1,120 +0,0 @@
From 08ec3c43bf9710fdf3ca664f7cd63436e67339d7 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:34 +0800
Subject: [PATCH 2/8] libhns: Fix the ownership of the head/tail pointer of SRQ
WQE
The CQE of SRQ is not generated in the order of wqe, so the wqe_idx
corresponding to the idle WQE should be placed in a FIFO, then the hardware
will be instructed to obtain the corresponding WQE. Therefore, the WQ
of SRQ has no concept of head pointer and tail pointer, but the queue of
wqe_idx does.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u.h | 4 ++--
providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------
providers/hns/hns_roce_u_verbs.c | 6 +++---
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 8f805dd1..b3f48113 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -205,6 +205,8 @@ struct hns_roce_idx_que {
int entry_shift;
unsigned long *bitmap;
int bitmap_cnt;
+ unsigned int head;
+ unsigned int tail;
};
struct hns_roce_srq {
@@ -217,8 +219,6 @@ struct hns_roce_srq {
unsigned int max_gs;
unsigned int rsv_sge;
unsigned int wqe_shift;
- int head;
- int tail;
unsigned int *db;
unsigned short counter;
struct hns_roce_idx_que idx_que;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4988943a..f947dbd7 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -262,7 +262,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
bitmap_num = ind / BIT_CNT_PER_LONG;
bit_num = ind % BIT_CNT_PER_LONG;
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
- srq->tail++;
+ srq->idx_que.tail++;
pthread_spin_unlock(&srq->lock);
}
@@ -1564,7 +1564,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
pthread_spin_lock(&srq->lock);
/* current idx of srqwq */
- ind = srq->head & (srq->wqe_cnt - 1);
+ ind = srq->idx_que.head & (srq->wqe_cnt - 1);
max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -1574,7 +1574,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
break;
}
- if (srq->head == srq->tail) {
+ if (srq->idx_que.head == srq->idx_que.tail) {
ret = -ENOMEM;
*bad_wr = wr;
break;
@@ -1607,7 +1607,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
}
if (nreq) {
- srq->head += nreq;
+ srq->idx_que.head += nreq;
/*
* Make sure that descriptors are written before
@@ -1617,8 +1617,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S |
srq->srqn);
- srq_db.parameter =
- htole32(srq->head & DB_PARAM_SRQ_PRODUCER_COUNTER_M);
+ srq_db.parameter = htole32(srq->idx_que.head &
+ DB_PARAM_SRQ_PRODUCER_COUNTER_M);
hns_roce_write64((uint32_t *)&srq_db, ctx,
ROCEE_VF_DB_CFG0_OFFSET);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 30ab072a..9b4934b9 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -491,6 +491,9 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
for (i = 0; i < idx_que->bitmap_cnt; ++i)
idx_que->bitmap[i] = ~(0UL);
+ idx_que->head = 0;
+ idx_que->tail = srq->wqe_cnt - 1;
+
return 0;
}
@@ -512,9 +515,6 @@ static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
return ENOMEM;
}
- srq->head = 0;
- srq->tail = srq->wqe_cnt - 1;
-
return 0;
}
--
2.33.0

View File

@ -1,180 +0,0 @@
From 9cc4c4b8d31b35428859ef626d4428fc393aace4 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Thu, 11 Nov 2021 21:08:35 +0800
Subject: [PATCH 3/8] libhns: Fix wrong data type when writing doorbell
The DB data is a __le32[] value instead of uint32_t[], and the DB register
should be written with a little-endian data instead of uint64_t.
Fixes: 1523fbb1ea8e ("libhns: Add verbs of cq support")
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_db.h | 14 ++++----------
providers/hns/hns_roce_u_hw_v1.c | 17 +++++++++--------
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++-----------
3 files changed, 25 insertions(+), 29 deletions(-)
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
index b44e64d4..13df9b52 100644
--- a/providers/hns/hns_roce_u_db.h
+++ b/providers/hns/hns_roce_u_db.h
@@ -32,23 +32,17 @@
#include <linux/types.h>
+#include <util/mmio.h>
#include "hns_roce_u.h"
#ifndef _HNS_ROCE_U_DB_H
#define _HNS_ROCE_U_DB_H
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0])
-#elif __BYTE_ORDER == __BIG_ENDIAN
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1])
-#else
-#error __BYTE_ORDER not defined
-#endif
+#define HNS_ROCE_WORD_NUM 2
-static inline void hns_roce_write64(uint32_t val[2],
- struct hns_roce_context *ctx, int offset)
+static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
{
- *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val);
+ mmio_write64_le(dest, *(__le64 *)val);
}
void *hns_roce_alloc_db(struct hns_roce_context *ctx,
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 8f0a71aa..14ee4817 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -65,7 +65,7 @@ static void hns_roce_update_rq_head(struct hns_roce_context *ctx,
udma_to_device_barrier();
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db);
}
static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
@@ -84,7 +84,7 @@ static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
udma_to_device_barrier();
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG);
+ hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db);
}
static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
@@ -102,7 +102,7 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
CQ_DB_U32_4_CONS_IDX_S,
cq->cons_index & ((cq->cq_depth << 1) - 1));
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
}
static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe,
@@ -422,10 +422,11 @@ static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne,
*/
static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
{
- uint32_t ci;
- uint32_t solicited_flag;
- struct hns_roce_cq_db cq_db = {};
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
+ struct hns_roce_cq_db cq_db = {};
+ uint32_t solicited_flag;
+ uint32_t ci;
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL :
@@ -441,8 +442,8 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M,
CQ_DB_U32_4_CONS_IDX_S, ci);
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
- ROCEE_DB_OTHERS_L_0_REG);
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
+
return 0;
}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index f947dbd7..efd949f4 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -293,7 +293,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
HNS_ROCE_V2_RQ_DB);
rq_db.parameter = htole32(rq_head);
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
}
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
sq_db.parameter = htole32(sq_head);
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
}
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
@@ -325,7 +325,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
DB_PARAM_CQ_CMD_SN_S, 1);
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
}
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
@@ -659,11 +659,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
{
- uint32_t ci;
- uint32_t cmd_sn;
- uint32_t solicited_flag;
- struct hns_roce_db cq_db = {};
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
+ struct hns_roce_db cq_db = {};
+ uint32_t solicited_flag;
+ uint32_t cmd_sn;
+ uint32_t ci;
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK;
@@ -681,8 +682,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
DB_PARAM_CQ_CMD_SN_S, cmd_sn);
roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag);
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
- ROCEE_VF_DB_CFG0_OFFSET);
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
+
return 0;
}
@@ -1620,8 +1621,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
srq_db.parameter = htole32(srq->idx_que.head &
DB_PARAM_SRQ_PRODUCER_COUNTER_M);
- hns_roce_write64((uint32_t *)&srq_db, ctx,
- ROCEE_VF_DB_CFG0_OFFSET);
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)&srq_db);
}
pthread_spin_unlock(&srq->lock);
--
2.33.0

View File

@ -1,43 +0,0 @@
From 60d82566fc94b11280be26733bc306e6af3d2697 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 9 Nov 2021 20:40:58 +0800
Subject: [PATCH 4/8] libhns: Remove unsupported QP type
Currently, user space does not support UC type QP.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u_hw_v1.c | 1 -
providers/hns/hns_roce_u_hw_v2.c | 3 +--
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 14ee4817..279c9b0f 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -532,7 +532,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
ctrl->flag |= htole32(ps_opcode);
wqe += sizeof(struct hns_roce_wqe_raddr_seg);
break;
- case IBV_QPT_UC:
case IBV_QPT_UD:
default:
break;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index efd949f4..c62f74b5 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -460,8 +460,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
struct hns_roce_qp **cur_qp,
struct ibv_wc *wc, uint32_t opcode)
{
- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC ||
- (*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_UC) &&
+ if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
(opcode == HNS_ROCE_RECV_OP_SEND ||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
--
2.33.0

View File

@ -1,67 +0,0 @@
From e460a4208d1821b1477e621ad5a7b72068e844f9 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:32 +0800
Subject: [PATCH 5/8] libhns: Avoid using WQE indexes that exceed the SRQ size
The index of SRQ WQE got from bitmap may be greater than the capability,
so a check for that should be added.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index c62f74b5..1169b64b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1527,8 +1527,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
return ret;
}
-static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
int bit_num;
int i;
@@ -1536,12 +1537,20 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que)
for (i = 0; i < idx_que->bitmap_cnt && idx_que->bitmap[i] == 0; ++i)
;
if (i == idx_que->bitmap_cnt)
- return ENOMEM;
+ return -ENOMEM;
bit_num = ffsl(idx_que->bitmap[i]);
idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
- return i * BIT_CNT_PER_LONG + (bit_num - 1);
+ *wqe_idx = i * BIT_CNT_PER_LONG + (bit_num - 1);
+
+ /* If wqe_cnt is less than BIT_CNT_PER_LONG, wqe_idx may be greater
+ * than wqe_cnt.
+ */
+ if (*wqe_idx >= srq->wqe_cnt)
+ return -ENOMEM;
+
+ return 0;
}
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
@@ -1580,9 +1589,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que);
- if (wqe_idx < 0 || wqe_idx >= srq->wqe_cnt) {
- ret = -ENOMEM;
+ ret = get_wqe_idx(srq, &wqe_idx);
+ if (ret) {
*bad_wr = wr;
break;
}
--
2.33.0

View File

@ -1,33 +0,0 @@
From 91034654bdb2fd6e1fce81b4c1aea41bb4b6bf98 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:33 +0800
Subject: [PATCH 6/8] libhns: Don't create RQ for a QP that associated with a
SRQ
If a QP is associated with a SRQ, it's RQ should not be created.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 9b4934b9..125858d2 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -760,6 +760,11 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
cap->max_recv_sge > ctx->max_sge)
return -EINVAL;
+ if (attr->srq) {
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
+ }
+
min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ?
HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM;
--
2.33.0

View File

@ -1,368 +0,0 @@
From 64c66455fef1c908cc8f06a2b71aa2fd71806218 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 15 Dec 2021 16:42:30 +0800
Subject: [PATCH 7/8] libhns: Add support for direct wqe
The current write wqe mechanism is to write to DDR first, and then notify
the hardware through doorbell to read the data. Direct wqe is a mechanism
to fill wqe directly into the hardware. In the case of light load, the wqe
will be filled into pcie bar space of the hardware, this will reduce one
memory access operation and therefore reduce the latency. SIMD instructions
allows cpu to write the 512 bits at one time to device memory, thus it can
be used for posting direct wqe.
The process of post send of HIP08/09:
+-----------+
| post send |
+-----+-----+
|
+-----+-----+
| write WQE |
+-----+-----+
|
| udma_to_device_barrier()
|
+-----+-----+ Y +-----------+ N
| HIP09 ? +------+ multi WR ?+-------------+
+-----+-----+ +-----+-----+ |
| N | Y |
+-----+-----+ +-----+-----+ +--------+--------+
| ring DB | | ring DB | |direct WQE (ST4) |
+-----------+ +-----------+ +-----------------+
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 5 +++-
providers/hns/hns_roce_u_hw_v2.c | 43 ++++++++++++++++++++++++++------
providers/hns/hns_roce_u_hw_v2.h | 31 +++++++++++++----------
providers/hns/hns_roce_u_verbs.c | 26 +++++++++++++++++--
util/mmio.h | 27 +++++++++++++++++++-
5 files changed, 107 insertions(+), 25 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index b3f48113..37711363 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -80,6 +80,8 @@
#define INVALID_SGE_LENGTH 0x80000000
+#define HNS_ROCE_DWQE_PAGE_SIZE 65536
+
#define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF
#define HNS_ROCE_ADDRESS_SHIFT 32
@@ -279,13 +281,14 @@ struct hns_roce_qp {
struct hns_roce_sge_ex ex_sge;
unsigned int next_sge;
int port_num;
- int sl;
+ uint8_t sl;
unsigned int qkey;
enum ibv_mtu path_mtu;
struct hns_roce_rinl_buf rq_rinl_buf;
unsigned long flags;
int refcnt; /* specially used for XRC */
+ void *dwqe_page;
};
struct hns_roce_av {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 1169b64b..f102fd61 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -33,6 +33,7 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
+#include <sys/mman.h>
#include "hns_roce_u.h"
#include "hns_roce_u_db.h"
#include "hns_roce_u_hw_v2.h"
@@ -297,20 +298,40 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
}
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
- unsigned int qpn, unsigned int sl,
- unsigned int sq_head)
+ struct hns_roce_qp *qp)
{
struct hns_roce_db sq_db = {};
- sq_db.byte_4 = htole32(qpn);
+ sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num);
roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
HNS_ROCE_V2_SQ_DB);
- sq_db.parameter = htole32(sq_head);
- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
+ sq_db.parameter = htole32(qp->sq.head);
+ roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
}
+static void hns_roce_write512(uint64_t *dest, uint64_t *val)
+{
+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
+}
+
+static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
+{
+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1);
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M,
+ RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl);
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M,
+ RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT);
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
+ RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+
+ hns_roce_write512(qp->dwqe_page, wqe);
+}
+
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
struct hns_roce_cq *cq)
{
@@ -339,8 +360,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
return NULL;
}
-static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx,
- struct hns_roce_qp *qp)
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
{
uint32_t qpn = qp->verbs_qp.qp.qp_num;
uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
@@ -1196,6 +1216,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
break;
case IBV_QPT_UD:
ret = set_ud_wqe(wqe, qp, wr, nreq, &sge_info);
+ qp->sl = to_hr_ah(wr->wr.ud.ah)->av.sl;
break;
default:
ret = EINVAL;
@@ -1214,7 +1235,10 @@ out:
udma_to_device_barrier();
- hns_roce_update_sq_db(ctx, ibvqp->qp_num, qp->sl, qp->sq.head);
+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ hns_roce_write_dwqe(qp, wqe);
+ else
+ hns_roce_update_sq_db(ctx, qp);
if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
*(qp->sdb) = qp->sq.head & 0xffff;
@@ -1506,6 +1530,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
if (ret)
return ret;
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ munmap(qp->dwqe_page, HNS_ROCE_DWQE_PAGE_SIZE);
+
hns_roce_v2_clear_qp(ctx, qp);
hns_roce_lock_cqs(ibqp);
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index c13d82e3..af72cd70 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -40,6 +40,8 @@
#define HNS_ROCE_CMDSN_MASK 0x3
+#define HNS_ROCE_SL_SHIFT 2
+
/* V2 REG DEFINITION */
#define ROCEE_VF_DB_CFG0_OFFSET 0x0230
@@ -133,6 +135,8 @@ struct hns_roce_db {
#define DB_BYTE_4_CMD_S 24
#define DB_BYTE_4_CMD_M GENMASK(27, 24)
+#define DB_BYTE_4_FLAG_S 31
+
#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0
#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0)
@@ -216,8 +220,16 @@ struct hns_roce_rc_sq_wqe {
};
#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
-#define RC_SQ_WQE_BYTE_4_OPCODE_M \
- (((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S)
+#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
+
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
+
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
#define RC_SQ_WQE_BYTE_4_OWNER_S 7
@@ -239,6 +251,8 @@ struct hns_roce_rc_sq_wqe {
#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
+#define RC_SQ_WQE_BYTE_4_FLAG_S 31
+
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
(((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
@@ -311,23 +325,12 @@ struct hns_roce_ud_sq_wqe {
#define UD_SQ_WQE_OPCODE_S 0
#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0)
-#define UD_SQ_WQE_DB_SL_L_S 5
-#define UD_SQ_WQE_DB_SL_L_M GENMASK(6, 5)
-
-#define UD_SQ_WQE_DB_SL_H_S 13
-#define UD_SQ_WQE_DB_SL_H_M GENMASK(14, 13)
-
-#define UD_SQ_WQE_INDEX_S 15
-#define UD_SQ_WQE_INDEX_M GENMASK(30, 15)
-
#define UD_SQ_WQE_OWNER_S 7
#define UD_SQ_WQE_CQE_S 8
#define UD_SQ_WQE_SE_S 11
-#define UD_SQ_WQE_FLAG_S 31
-
#define UD_SQ_WQE_PD_S 0
#define UD_SQ_WQE_PD_M GENMASK(23, 0)
@@ -376,4 +379,6 @@ struct hns_roce_ud_sq_wqe {
#define MAX_SERVICE_LEVEL 0x7
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
+
#endif /* _HNS_ROCE_U_HW_V2_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 125858d2..fc902815 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1076,7 +1076,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp,
- struct hns_roce_context *ctx)
+ struct hns_roce_context *ctx,
+ uint64_t *dwqe_mmap_key)
{
struct hns_roce_create_qp_ex_resp resp_ex = {};
struct hns_roce_create_qp_ex cmd_ex = {};
@@ -1093,6 +1094,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
&resp_ex.ibv_resp, sizeof(resp_ex));
qp->flags = resp_ex.drv_payload.cap_flags;
+ *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key;
return ret;
}
@@ -1144,11 +1146,23 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
return ret;
}
+static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
+ uint64_t dwqe_mmap_key)
+{
+ qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE,
+ MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key);
+ if (qp->dwqe_page == MAP_FAILED)
+ return -EINVAL;
+
+ return 0;
+}
+
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
struct ibv_qp_init_attr_ex *attr)
{
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
struct hns_roce_qp *qp;
+ uint64_t dwqe_mmap_key;
int ret;
ret = verify_qp_create_attr(context, attr);
@@ -1167,7 +1181,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_buf;
- ret = qp_exec_create_cmd(attr, qp, context);
+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
if (ret)
goto err_cmd;
@@ -1175,10 +1189,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_store;
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+ ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key);
+ if (ret)
+ goto err_dwqe;
+ }
+
qp_setup_config(attr, qp, context);
return &qp->verbs_qp.qp;
+err_dwqe:
+ hns_roce_v2_clear_qp(context, qp);
err_store:
ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
err_cmd:
diff --git a/util/mmio.h b/util/mmio.h
index 101af9dd..01d1455e 100644
--- a/util/mmio.h
+++ b/util/mmio.h
@@ -210,8 +210,33 @@ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
{
s390_mmio_write(dest, src, bytecnt);
}
-#else
+#elif defined(__aarch64__) || defined(__arm__)
+#include <arm_neon.h>
+
+static inline void _mmio_memcpy_x64_64b(void *dest, const void *src)
+{
+ vst4q_u64(dest, vld4q_u64(src));
+}
+
+static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
+{
+ do {
+ _mmio_memcpy_x64_64b(dest, src);
+ bytecnt -= sizeof(uint64x2x4_t);
+ src += sizeof(uint64x2x4_t);
+ } while (bytecnt > 0);
+}
+
+#define mmio_memcpy_x64(dest, src, bytecount) \
+ ({ \
+ if (__builtin_constant_p((bytecount) == 64)) \
+ _mmio_memcpy_x64_64b((dest), (src)); \
+ else \
+ _mmio_memcpy_x64((dest), (src), (bytecount)); \
+ })
+
+#else
/* Transfer is some multiple of 64 bytes */
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
{
--
2.33.0

View File

@ -1,70 +0,0 @@
From 608c142e7cbac2a6c02071022fe87b081a6ddc4f Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Tue, 21 Dec 2021 21:38:08 +0800
Subject: [PATCH 8/8] libhns: Use new SQ doorbell register for HIP09
HIP09 set a new BAR space for SQ doorbell. Each SQ doorbell has an
independent BAR space and the size is 64KB. SQ doorbell share
the same BAR space with direct WQE.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
providers/hns/hns_roce_u_verbs.c | 5 +++++
3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 37711363..460363b7 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -238,6 +238,7 @@ struct hns_roce_wq {
unsigned int wqe_shift;
unsigned int shift; /* wq size is 2^shift */
int offset;
+ void *db_reg;
};
/* record the result of sge process */
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index f102fd61..9cbc0aac 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
sq_db.parameter = htole32(qp->sq.head);
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
+ hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
}
static void hns_roce_write512(uint64_t *dest, uint64_t *val)
@@ -329,7 +329,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
- hns_roce_write512(qp->dwqe_page, wqe);
+ hns_roce_write512(qp->sq.db_reg, wqe);
}
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index fc902815..c5022c83 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1117,6 +1117,11 @@ static void qp_setup_config(struct ibv_qp_init_attr_ex *attr,
}
qp->max_inline_data = attr->cap.max_inline_data;
+
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp->sq.db_reg = qp->dwqe_page;
+ else
+ qp->sq.db_reg = ctx->uar + ROCEE_VF_DB_CFG0_OFFSET;
}
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
--
2.33.0

View File

@ -1,70 +0,0 @@
From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:35 +0800
Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR
If the user post a list of WRs, the head in the for loop is not updated in
time, and the judgment of if (head == tail) becomes invalid.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++----
providers/hns/hns_roce_u_verbs.c | 2 +-
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 82124082..0c15bdbe 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
return ret;
}
+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int cur;
+
+ cur = idx_que->head - idx_que->tail;
+ return cur >= srq->wqe_cnt - 1;
+}
+
static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (wr->num_sge > max_sge) {
- ret = -EINVAL;
+ if (hns_roce_v2_srqwq_overflow(srq)) {
+ ret = -ENOMEM;
*bad_wr = wr;
break;
}
- if (srq->idx_que.head == srq->idx_que.tail) {
- ret = -ENOMEM;
+ if (wr->num_sge > max_sge) {
+ ret = -EINVAL;
*bad_wr = wr;
break;
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 3abf7b48..dace35fd 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
idx_que->bitmap[i] = ~(0UL);
idx_que->head = 0;
- idx_que->tail = srq->wqe_cnt - 1;
+ idx_que->tail = 0;
return 0;
}
--
2.30.0

View File

@ -1,30 +0,0 @@
From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:36 +0800
Subject: libhns: Allow users to create a 0-depth SRQs
Users is allowed to create 0-depth SRQs, so the judgement about whether
max_wr is zero should be removed.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index dace35fd..2d1a6de3 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context,
init_attr->srq_type != IBV_SRQT_XRC)
return -EINVAL;
- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge ||
+ if (!init_attr->attr.max_sge ||
init_attr->attr.max_wr > context->max_srq_wr ||
init_attr->attr.max_sge > context->max_srq_sge)
return -EINVAL;
--
2.30.0

View File

@ -1,176 +0,0 @@
From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:37 +0800
Subject: libhns: Refactor the process of post_srq_recv
SRQ is a shared queue, it mainly consists of four parts:
1. wqe buf: wqe buf is used to store wqe data.
2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so
the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO
queue, it can instruct the hardware to obtain the corresponding WQE.
3.bitmap: bitmap is used to generate and release wqe_idx. When the user
has a new WR, the driver finds the idx of the idle wqe in bitmap. When the
CQE of wqe is generated, the driver releases the idx.
4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it
to the user when ibv_poll_cq() is invoked.
After refactor, the functions of the four parts are more clearer.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++-------------
1 file changed, 57 insertions(+), 38 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 0c15bdbe..b622eaef 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
{
return srq->buf.buf + (n << srq->wqe_shift);
}
@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
return cur >= srq->wqe_cnt - 1;
}
-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
+static int check_post_srq_valid(struct hns_roce_srq *srq,
+ struct ibv_recv_wr *wr)
+{
+ unsigned int max_sge = srq->max_gs - srq->rsv_sge;
+
+ if (hns_roce_v2_srqwq_overflow(srq))
+ return -ENOMEM;
+
+ if (wr->num_sge > max_sge)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
int bit_num;
@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
return 0;
}
+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
+ struct ibv_recv_wr *wr)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ int i;
+
+ dseg = get_srq_wqe(srq, wqe_idx);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = htole32(wr->sg_list[i].length);
+ dseg[i].lkey = htole32(wr->sg_list[i].lkey);
+ dseg[i].addr = htole64(wr->sg_list[i].addr);
+ }
+
+ /* hw stop reading when identify the last one */
+ if (srq->rsv_sge) {
+ dseg[i].len = htole32(INVALID_SGE_LENGTH);
+ dseg[i].lkey = htole32(0x0);
+ dseg[i].addr = 0;
+ }
+}
+
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *idx_buf;
+
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ idx_buf = get_idx_buf(idx_que, head);
+ *idx_buf = htole32(wqe_idx);
+
+ idx_que->head++;
+}
+
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
struct ibv_recv_wr *wr,
struct ibv_recv_wr **bad_wr)
{
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_db srq_db;
- unsigned int max_sge;
- __le32 *srq_idx;
+ unsigned int wqe_idx;
int ret = 0;
- int wqe_idx;
- void *wqe;
int nreq;
- int ind;
- int i;
pthread_spin_lock(&srq->lock);
- /* current idx of srqwq */
- ind = srq->idx_que.head & (srq->wqe_cnt - 1);
-
- max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_v2_srqwq_overflow(srq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- break;
- }
-
- if (wr->num_sge > max_sge) {
- ret = -EINVAL;
+ ret = check_post_srq_valid(srq, wr);
+ if (ret) {
*bad_wr = wr;
break;
}
@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
break;
}
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
-
- /* hw stop reading when identify the last one */
- if (srq->rsv_sge)
- set_ending_data_seg(dseg);
-
- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind);
- *srq_idx = htole32(wqe_idx);
+ fill_srq_wqe(srq, wqe_idx, wr);
+ fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->wqe_cnt - 1);
}
if (nreq) {
- srq->idx_que.head += nreq;
-
/*
* Make sure that descriptors are written before
* we write doorbell record.
--
2.30.0

View File

@ -1,33 +0,0 @@
From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:38 +0800
Subject: libhns: Set srqlimit to 0 when creating SRQ
According to the IB specification, the srq_limt parameter should not be
configured when creating srq. But the libhns does not set attr.srq_limit
to 0 currently. As a result, when attr.srq_limit provided by the user is
not 0, the value of attr.srq_limit returned to the user will be different
from that obtained by ibv_query_srq(). Therefore, the driver should set
attr.srq_limit to 0 when creating SRQ.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 2d1a6de3..107da753 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
attr->max_sge = srq->max_gs;
+ attr->srq_limit = 0;
ret = hns_roce_create_idx_que(srq);
if (ret)
--
2.30.0

View File

@ -1,367 +0,0 @@
From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:39 +0800
Subject: libhns: Refactor the process of create_srq
Reorganize create_srq() as several sub-functions to make the process
clearer.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u.h | 7 +-
providers/hns/hns_roce_u_hw_v2.c | 2 +-
providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++-------------
3 files changed, 105 insertions(+), 82 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index b3f48113..a437727c 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -211,7 +211,8 @@ struct hns_roce_idx_que {
struct hns_roce_srq {
struct verbs_srq verbs_srq;
- struct hns_roce_buf buf;
+ struct hns_roce_idx_que idx_que;
+ struct hns_roce_buf wqe_buf;
pthread_spinlock_t lock;
unsigned long *wrid;
unsigned int srqn;
@@ -221,7 +222,6 @@ struct hns_roce_srq {
unsigned int wqe_shift;
unsigned int *db;
unsigned short counter;
- struct hns_roce_idx_que idx_que;
};
struct hns_roce_wq {
@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
{
- return container_of(container_of(ibv_srq, struct verbs_srq, srq),
- struct hns_roce_srq, verbs_srq);
+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq);
}
static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b622eaef..d4e7e4f9 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
{
- return srq->buf.buf + (n << srq->wqe_shift);
+ return srq->wqe_buf.buf + (n << srq->wqe_shift);
}
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 107da753..75b9e530 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
ctx->srq_table_shift;
+ pthread_mutex_lock(&ctx->srq_table_mutex);
+
if (!ctx->srq_table[tind].refcnt) {
ctx->srq_table[tind].table =
calloc(ctx->srq_table_mask + 1,
sizeof(struct hns_roce_srq *));
- if (!ctx->srq_table[tind].table)
+ if (!ctx->srq_table[tind].table) {
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
return -ENOMEM;
+ }
}
++ctx->srq_table[tind].refcnt;
ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq;
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
+
return 0;
}
@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
{
uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
+ pthread_mutex_lock(&ctx->srq_table_mutex);
+
if (!--ctx->srq_table[tind].refcnt)
free(ctx->srq_table[tind].table);
else
ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL;
+
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
+}
+
+static int verify_srq_create_attr(struct hns_roce_context *context,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ if (attr->srq_type != IBV_SRQT_BASIC &&
+ attr->srq_type != IBV_SRQT_XRC)
+ return -EINVAL;
+
+ if (!attr->attr.max_sge ||
+ attr->attr.max_wr > context->max_srq_wr ||
+ attr->attr.max_sge > context->max_srq_sge)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
+ srq->rsv_sge = 1;
+
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
+ srq->max_gs));
+ attr->attr.max_sge = srq->max_gs;
+ attr->attr.srq_limit = 0;
}
-static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
+static int alloc_srq_idx_que(struct hns_roce_srq *srq)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
unsigned int buf_size;
@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
BIT_CNT_PER_LONG;
idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long));
if (!idx_que->bitmap)
- return ENOMEM;
+ return -ENOMEM;
buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift);
if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) {
free(idx_que->bitmap);
idx_que->bitmap = NULL;
- return ENOMEM;
+ return -ENOMEM;
}
/* init the idx_que bitmap */
@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
return 0;
}
-static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq)
{
- int srq_buf_size;
+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long));
- if (!srq->wrid)
- return ENOMEM;
+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE);
+}
- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
- srq->max_gs));
- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
+static int alloc_srq_buf(struct hns_roce_srq *srq)
+{
+ int ret;
- /* allocate srq wqe buf */
- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) {
- free(srq->wrid);
- return ENOMEM;
+ ret = alloc_srq_idx_que(srq);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_wqe_buf(srq);
+ if (ret)
+ goto err_idx_que;
+
+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid));
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_wqe_buf;
}
return 0;
-}
-static int hns_roce_verify_srq(struct hns_roce_context *context,
- struct ibv_srq_init_attr_ex *init_attr)
-{
- if (init_attr->srq_type != IBV_SRQT_BASIC &&
- init_attr->srq_type != IBV_SRQT_XRC)
- return -EINVAL;
+err_wqe_buf:
+ hns_roce_free_buf(&srq->wqe_buf);
+err_idx_que:
+ hns_roce_free_buf(&srq->idx_que.buf);
+ free(srq->idx_que.bitmap);
- if (!init_attr->attr.max_sge ||
- init_attr->attr.max_wr > context->max_srq_wr ||
- init_attr->attr.max_sge > context->max_srq_sge)
- return -EINVAL;
+ return ret;
+}
- return 0;
+static void free_srq_buf(struct hns_roce_srq *srq)
+{
+ free(srq->wrid);
+ hns_roce_free_buf(&srq->wqe_buf);
+ hns_roce_free_buf(&srq->idx_que.buf);
+ free(srq->idx_que.bitmap);
}
static int exec_srq_create_cmd(struct ibv_context *context,
@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context,
struct hns_roce_create_srq_ex cmd_ex = {};
int ret;
- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf;
+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf;
cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf;
cmd_ex.db_addr = (uintptr_t)srq->db;
@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context,
static struct ibv_srq *create_srq(struct ibv_context *context,
struct ibv_srq_init_attr_ex *init_attr)
{
- struct hns_roce_context *ctx = to_hr_ctx(context);
- struct ibv_srq_attr *attr = &init_attr->attr;
+ struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_srq *srq;
int ret;
- if (hns_roce_verify_srq(ctx, init_attr))
- return NULL;
+ ret = verify_srq_create_attr(hr_ctx, init_attr);
+ if (ret)
+ goto err;
srq = calloc(1, sizeof(*srq));
- if (!srq)
- return NULL;
+ if (!srq) {
+ ret = -ENOMEM;
+ goto err;
+ }
if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
goto err_free_srq;
- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
- srq->rsv_sge = 1;
-
- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
- attr->max_sge = srq->max_gs;
- attr->srq_limit = 0;
-
- ret = hns_roce_create_idx_que(srq);
- if (ret)
+ set_srq_param(context, srq, init_attr);
+ if (alloc_srq_buf(srq))
goto err_free_srq;
- ret = hns_roce_alloc_srq_buf(srq);
- if (ret)
- goto err_idx_que;
-
- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB);
if (!srq->db)
goto err_srq_buf;
- *(srq->db) = 0;
-
- pthread_mutex_lock(&ctx->srq_table_mutex);
+ *srq->db = 0;
ret = exec_srq_create_cmd(context, srq, init_attr);
if (ret)
goto err_srq_db;
- ret = hns_roce_store_srq(ctx, srq);
+ ret = hns_roce_store_srq(hr_ctx, srq);
if (ret)
goto err_destroy_srq;
- pthread_mutex_unlock(&ctx->srq_table_mutex);
-
- srq->max_gs = attr->max_sge;
- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge);
+ srq->max_gs = init_attr->attr.max_sge;
+ init_attr->attr.max_sge =
+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
return &srq->verbs_srq.srq;
@@ -617,20 +651,19 @@ err_destroy_srq:
ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
err_srq_db:
- pthread_mutex_unlock(&ctx->srq_table_mutex);
- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
err_srq_buf:
- free(srq->wrid);
- hns_roce_free_buf(&srq->buf);
-
-err_idx_que:
- free(srq->idx_que.bitmap);
- hns_roce_free_buf(&srq->idx_que.buf);
+ free_srq_buf(srq);
err_free_srq:
free(srq);
+err:
+ if (ret < 0)
+ ret = -ret;
+
+ errno = ret;
return NULL;
}
@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
int ret;
- pthread_mutex_lock(&ctx->srq_table_mutex);
-
ret = ibv_cmd_destroy_srq(ibv_srq);
- if (ret) {
- pthread_mutex_unlock(&ctx->srq_table_mutex);
+ if (ret)
return ret;
- }
hns_roce_clear_srq(ctx, srq->srqn);
- pthread_mutex_unlock(&ctx->srq_table_mutex);
-
hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
- hns_roce_free_buf(&srq->buf);
- free(srq->wrid);
- hns_roce_free_buf(&srq->idx_que.buf);
- free(srq->idx_que.bitmap);
+ free_srq_buf(srq);
free(srq);
return 0;
--
2.30.0

View File

@ -1,69 +0,0 @@
From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:40 +0800
Subject: libhns: Remove the reserved wqe of SRQ
There is an unreasonable reserved WQE in SRQ, it should be removed.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_hw_v2.c | 4 +---
providers/hns/hns_roce_u_verbs.c | 5 ++++-
3 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index a437727c..0d7abd81 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -64,6 +64,7 @@
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_V1_MIN_WQE_NUM 0x20
#define HNS_ROCE_V2_MIN_WQE_NUM 0x40
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
#define HNS_ROCE_CQE_SIZE 0x20
#define HNS_ROCE_V3_CQE_SIZE 0x40
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d4e7e4f9..2fb6cdaf 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- unsigned int cur;
- cur = idx_que->head - idx_que->tail;
- return cur >= srq->wqe_cnt - 1;
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
}
static int check_post_srq_valid(struct hns_roce_srq *srq,
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 75b9e530..4847639b 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context,
attr->attr.max_sge > context->max_srq_sge)
return -EINVAL;
+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr,
+ HNS_ROCE_MIN_SRQ_WQE_NUM);
+
return 0;
}
@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
srq->rsv_sge = 1;
- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr);
srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
srq->max_gs));
--
2.30.0

View File

@ -1,89 +0,0 @@
From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001
From: Weihang Li <liweihang@huawei.com>
Date: Fri, 14 May 2021 10:02:56 +0800
Subject: libhns: Refactor process of setting extended sge
Refactor and encapsulate the parts of getting number of extended sge a WQE
can use to make it easier to understand.
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------
1 file changed, 29 insertions(+), 16 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 30ab072a..a8508fc5 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -920,31 +920,44 @@ err_alloc:
return -ENOMEM;
}
-static void set_extend_sge_param(struct hns_roce_device *hr_dev,
- struct ibv_qp_init_attr_ex *attr,
- struct hns_roce_qp *qp, unsigned int wr_cnt)
+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
{
- int cnt = 0;
+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
+ return qp->sq.max_gs;
+
+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+
+ return 0;
+}
+
+static void set_ext_sge_param(struct hns_roce_device *hr_dev,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp, unsigned int wr_cnt)
+{
+ unsigned int total_sge_cnt;
+ unsigned int wqe_sge_cnt;
+
+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
if (hr_dev->hw_version == HNS_ROCE_HW_VER1) {
qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
- } else {
- qp->sq.max_gs = attr->cap.max_send_sge;
- if (attr->qp_type == IBV_QPT_UD)
- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs);
- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- cnt = roundup_pow_of_two(wr_cnt *
- (qp->sq.max_gs -
- HNS_ROCE_SGE_IN_WQE));
+ return;
}
- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ qp->sq.max_gs = attr->cap.max_send_sge;
+
+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- qp->ex_sge.sge_cnt = cnt ?
- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0;
+ if (wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
+ qp->ex_sge.sge_cnt =
+ max(total_sge_cnt,
+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
}
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
qp->sq.wqe_cnt = cnt;
qp->sq.shift = hr_ilog32(cnt);
- set_extend_sge_param(hr_dev, attr, qp, cnt);
+ set_ext_sge_param(hr_dev, attr, qp, cnt);
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
--
2.30.0

View File

@ -1,139 +0,0 @@
From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Fri, 14 May 2021 10:02:57 +0800
Subject: libhns: Optimize set_sge process
Use local variables to avoid frequent ldr/str operations. And because UD's
process of setting sge is more simple then RC, set_sge() can be splited
into two functions for compiler optimization.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
1 file changed, 61 insertions(+), 22 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4988943a..dc79a6f8 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
return 0;
}
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- struct hns_roce_sge_info *sge_info)
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
{
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int flag;
int i;
- sge_info->valid_num = 0;
- sge_info->total_len = 0;
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
- for (i = 0; i < wr->num_sge; i++) {
- if (unlikely(!wr->sg_list[i].length))
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
continue;
- sge_info->total_len += wr->sg_list[i].length;
- sge_info->valid_num++;
+ len += sge->length;
+ cnt++;
- if (wr->send_flags & IBV_SEND_INLINE &&
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
+ if (flag)
continue;
- /* No inner sge in UD wqe */
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
- set_data_seg_v2(dseg, wr->sg_list + i);
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
+ set_data_seg_v2(dseg, sge);
dseg++;
} else {
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
- (qp->ex_sge.sge_cnt - 1));
- set_data_seg_v2(dseg, wr->sg_list + i);
- sge_info->start_idx++;
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
}
}
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
+}
+
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
+{
+ int flag = wr->send_flags & IBV_SEND_INLINE;
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
+ continue;
+
+ len += sge->length;
+ cnt++;
+
+ if (flag)
+ continue;
+
+ /* No inner sge in UD wqe */
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
+ }
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
UD_SQ_WQE_MSG_START_SGE_IDX_S,
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
wqe += sizeof(struct hns_roce_rc_sq_wqe);
dseg = wqe;
- set_sge(dseg, qp, wr, sge_info);
+ set_rc_sge(dseg, qp, wr, sge_info);
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
--
2.30.0

View File

@ -1,258 +0,0 @@
From 1ea1524950b8bc4e4dfe06865e1e5c47a657b6e4 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Sun, 6 Jun 2021 14:48:07 +0300
Subject: verbs: Add generic logging API
A debug prints mechanism is useful when debugging application failures.
This patch adds a generic API that can be used by all providers and
replace provider-specific counterparts.
The debug messages are controlled through an environment variable named
VERBS_LOG_LEVEL, where the value indicates which prints should be
enabled:
enum {
VERBS_LOG_LEVEL_NONE,
VERBS_LOG_ERR,
VERBS_LOG_WARN,
VERBS_LOG_INFO,
VERBS_LOG_DEBUG,
};
For example, to enable prints with level warn or higher, VERBS_LOG_LEVEL
shall be set to 2.
The output shall be written to the file provided in the VERBS_LOG_FILE
environment variable. When the library is compiled in debug mode and no
file is provided the output shall be written to stderr.
For data-path flows, where the overhead of the additional if statement
matters, the verbs_*_datapath() macros can be used, which will be
compiled out when the library is compiled for release.
Signed-off-by: Gal Pressman <galpress@amazon.com>
---
Documentation/libibverbs.md | 18 ++++++++++
buildlib/RDMA_BuildType.cmake | 4 +++
libibverbs/driver.h | 50 +++++++++++++++++++++++++++
libibverbs/init.c | 65 +++++++++++++++++++++++++++++++++++
libibverbs/libibverbs.map.in | 1 +
5 files changed, 138 insertions(+)
diff --git a/Documentation/libibverbs.md b/Documentation/libibverbs.md
index cbe076e..980f354 100644
--- a/Documentation/libibverbs.md
+++ b/Documentation/libibverbs.md
@@ -56,3 +56,21 @@ need to increase this limit. This is usually done for ordinary users
via the file /etc/security/limits.conf. More configuration may be
necessary if you are logging in via OpenSSH and your sshd is
configured to use privilege separation.
+
+# Debugging
+
+### Enabling debug prints
+
+Library and providers debug prints can be enabled using the `VERBS_LOG_LEVEL`
+environment variable, the output shall be written to the file provided in the
+`VERBS_LOG_FILE` environment variable. When the library is compiled in debug
+mode and no file is provided the output will be written to stderr.
+
+Note: some of the debug prints are only available when the library is compiled
+in debug mode.
+
+The following table describes the expected behavior when VERBS_LOG_LEVEL is set:
+| | Release | Debug |
+|-----------------|---------------------------------|------------------------------------------------|
+| Regular prints | Output to VERBS_LOG_FILE if set | Output to VERBS_LOG_FILE, or stderr if not set |
+| Datapath prints | Compiled out, no output | Output to VERBS_LOG_FILE, or stderr if not set |
diff --git a/buildlib/RDMA_BuildType.cmake b/buildlib/RDMA_BuildType.cmake
index 17206f5..7a4f6a4 100644
--- a/buildlib/RDMA_BuildType.cmake
+++ b/buildlib/RDMA_BuildType.cmake
@@ -39,4 +39,8 @@ function(RDMA_BuildType)
CACHE STRING "Default flags for RelWithDebInfo configuration" FORCE)
endif()
endforeach()
+
+ if (CMAKE_BUILD_TYPE STREQUAL Debug OR CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
+ add_definitions("-DVERBS_DEBUG")
+ endif()
endfunction()
diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index 926023b..bdb1aa4 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -49,6 +49,56 @@
struct verbs_device;
+enum {
+ VERBS_LOG_LEVEL_NONE,
+ VERBS_LOG_ERR,
+ VERBS_LOG_WARN,
+ VERBS_LOG_INFO,
+ VERBS_LOG_DEBUG,
+};
+
+void __verbs_log(struct verbs_context *ctx, uint32_t level,
+ const char *fmt, ...);
+
+#define verbs_log(ctx, level, format, arg...) \
+do { \
+ int tmp = errno; \
+ __verbs_log(ctx, level, "%s: %s:%d: " format, \
+ (ctx)->context.device->name, __func__, __LINE__, ##arg); \
+ errno = tmp; \
+} while (0)
+
+#define verbs_debug(ctx, format, arg...) \
+ verbs_log(ctx, VERBS_LOG_DEBUG, format, ##arg)
+
+#define verbs_info(ctx, format, arg...) \
+ verbs_log(ctx, VERBS_LOG_INFO, format, ##arg)
+
+#define verbs_warn(ctx, format, arg...) \
+ verbs_log(ctx, VERBS_LOG_WARN, format, ##arg)
+
+#define verbs_err(ctx, format, arg...) \
+ verbs_log(ctx, VERBS_LOG_ERR, format, ##arg)
+
+#ifdef VERBS_DEBUG
+#define verbs_log_datapath(ctx, level, format, arg...) \
+ verbs_log(ctx, level, format, ##arg)
+#else
+#define verbs_log_datapath(ctx, level, format, arg...) {}
+#endif
+
+#define verbs_debug_datapath(ctx, format, arg...) \
+ verbs_log_datapath(ctx, VERBS_LOG_DEBUG, format, ##arg)
+
+#define verbs_info_datapath(ctx, format, arg...) \
+ verbs_log_datapath(ctx, VERBS_LOG_INFO, format, ##arg)
+
+#define verbs_warn_datapath(ctx, format, arg...) \
+ verbs_log_datapath(ctx, VERBS_LOG_WARN, format, ##arg)
+
+#define verbs_err_datapath(ctx, format, arg...) \
+ verbs_log_datapath(ctx, VERBS_LOG_ERR, format, ##arg)
+
enum verbs_xrcd_mask {
VERBS_XRCD_HANDLE = 1 << 0,
VERBS_XRCD_RESERVED = 1 << 1
diff --git a/libibverbs/init.c b/libibverbs/init.c
index f5340ea..52b166a 100644
--- a/libibverbs/init.c
+++ b/libibverbs/init.c
@@ -36,6 +36,7 @@
#include <stdlib.h>
#include <string.h>
#include <glob.h>
+#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>
@@ -52,11 +53,30 @@
#include <rdma/rdma_netlink.h>
#include <util/util.h>
+#include "driver.h"
#include "ibverbs.h"
#include <infiniband/cmd_write.h>
int abi_ver;
+static uint32_t verbs_log_level;
+static FILE *verbs_log_fp;
+
+__attribute__((format(printf, 3, 4)))
+void __verbs_log(struct verbs_context *ctx, uint32_t level,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (level <= verbs_log_level) {
+ int tmp = errno;
+ va_start(args, fmt);
+ vfprintf(verbs_log_fp, fmt, args);
+ va_end(args);
+ errno = tmp;
+ }
+}
+
struct ibv_driver {
struct list_node entry;
const struct verbs_device_ops *ops;
@@ -600,6 +620,49 @@ out:
return num_devices;
}
+static void verbs_set_log_level(void)
+{
+ char *env;
+
+ env = getenv("VERBS_LOG_LEVEL");
+ if (env)
+ verbs_log_level = strtol(env, NULL, 0);
+}
+
+/*
+ * Fallback in case log file is not provided or can't be opened.
+ * Release mode: disable debug prints.
+ * Debug mode: Use stderr instead of a file.
+ */
+static void verbs_log_file_fallback(void)
+{
+#ifdef VERBS_DEBUG
+ verbs_log_fp = stderr;
+#else
+ verbs_log_level = VERBS_LOG_LEVEL_NONE;
+#endif
+}
+
+static void verbs_set_log_file(void)
+{
+ char *env;
+
+ if (verbs_log_level == VERBS_LOG_LEVEL_NONE)
+ return;
+
+ env = getenv("VERBS_LOG_FILE");
+ if (!env) {
+ verbs_log_file_fallback();
+ return;
+ }
+
+ verbs_log_fp = fopen(env, "aw+");
+ if (!verbs_log_fp) {
+ verbs_log_file_fallback();
+ return;
+ }
+}
+
int ibverbs_init(void)
{
char *env_value;
@@ -621,6 +684,8 @@ int ibverbs_init(void)
return -errno;
check_memlock_limit();
+ verbs_set_log_level();
+ verbs_set_log_file();
return 0;
}
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
index 7c0fb6a..905f58f 100644
--- a/libibverbs/libibverbs.map.in
+++ b/libibverbs/libibverbs.map.in
@@ -167,6 +167,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ {
global:
/* These historical symbols are now private to libibverbs */
__ioctl_final_num_attrs;
+ __verbs_log;
_verbs_init_and_alloc_context;
execute_ioctl;
ibv_cmd_advise_mr;
--
2.27.0

View File

@ -1,164 +0,0 @@
From 7c9a7a5848d19b792d1b108da55fa48611142a9b Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Tue, 29 Jun 2021 10:43:29 +0300
Subject: libhns: Use the verbs logging API instead of printf/fprintf
Use the generic verbs logging API instead of calling printf/fprintf
directly.
This means that by default the prints will no longer be seen, but can be
enabled by setting VERBS_LOG_LEVEL appropriately.
Signed-off-by: Gal Pressman <galpress@amazon.com>
---
providers/hns/hns_roce_u_hw_v1.c | 34 +++++++++++++++++++++-----------
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
providers/hns/hns_roce_u_verbs.c | 6 ++++--
3 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 279c9b0..6e107af 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -108,7 +108,6 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe,
struct ibv_wc *wc)
{
- fprintf(stderr, PFX "error cqe!\n");
switch (roce_get_field(cqe->cqe_byte_4,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
@@ -176,7 +175,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq)
static void *get_recv_wqe(struct hns_roce_qp *qp, int n)
{
if ((n < 0) || (n > qp->rq.wqe_cnt)) {
- printf("rq wqe index:%d,rq wqe cnt:%d\r\n", n, qp->rq.wqe_cnt);
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+ "rq wqe index:%d,rq wqe cnt:%d\r\n", n,
+ qp->rq.wqe_cnt);
return NULL;
}
@@ -186,7 +187,9 @@ static void *get_recv_wqe(struct hns_roce_qp *qp, int n)
static void *get_send_wqe(struct hns_roce_qp *qp, int n)
{
if ((n < 0) || (n > qp->sq.wqe_cnt)) {
- printf("sq wqe index:%d,sq wqe cnt:%d\r\n", n, qp->sq.wqe_cnt);
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+ "sq wqe index:%d,sq wqe cnt:%d\r\n", n,
+ qp->sq.wqe_cnt);
return NULL;
}
@@ -207,8 +210,9 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
cur = wq->head - wq->tail;
pthread_spin_unlock(&cq->lock);
- printf("wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
- wq->head, wq->tail, wq->max_post, nreq);
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
+ wq->head, wq->tail, wq->max_post, nreq);
return cur + nreq >= wq->max_post;
}
@@ -221,7 +225,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
if (ctx->qp_table[tind].refcnt) {
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
} else {
- printf("hns_roce_find_qp fail!\n");
+ verbs_err(&ctx->ibv_ctx, "hns_roce_find_qp fail!\n");
return NULL;
}
}
@@ -273,7 +277,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
*cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context),
qpn & 0xffffff);
if (!*cur_qp) {
- fprintf(stderr, PFX "can't find qp!\n");
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ PFX "can't find qp!\n");
return CQ_POLL_ERR;
}
}
@@ -312,6 +317,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
if (roce_get_field(cqe->cqe_byte_4,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) {
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ PFX "error cqe!\n");
hns_roce_handle_error_cqe(cqe, wc);
return CQ_OK;
}
@@ -475,8 +482,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
if (wr->num_sge > qp->sq.max_gs) {
ret = -1;
*bad_wr = wr;
- printf("wr->num_sge(<=%d) = %d, check failed!\r\n",
- qp->sq.max_gs, wr->num_sge);
+ verbs_err(verbs_get_ctx(ibvqp->context),
+ "wr->num_sge(<=%d) = %d, check failed!\r\n",
+ qp->sq.max_gs, wr->num_sge);
goto out;
}
@@ -544,8 +552,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
if (le32toh(ctrl->msg_length) > qp->max_inline_data) {
ret = -1;
*bad_wr = wr;
- printf("inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n",
- wr->send_flags, ctrl->msg_length);
+ verbs_err(verbs_get_ctx(ibvqp->context),
+ "inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n",
+ wr->send_flags, ctrl->msg_length);
return ret;
}
@@ -650,7 +659,8 @@ static int hns_roce_u_v1_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (!ret && (attr_mask & IBV_QP_PORT)) {
hr_qp->port_num = attr->port_num;
- printf("hr_qp->port_num= 0x%x\n", hr_qp->port_num);
+ verbs_err(verbs_get_ctx(qp->context), "hr_qp->port_num= 0x%x\n",
+ hr_qp->port_num);
}
hr_qp->sl = attr->ah_attr.sl;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4c21720..d4b76b5 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -629,8 +629,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode);
if (ret) {
- fprintf(stderr,
- PFX "failed to handle recv inline wqe!\n");
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ PFX "failed to handle recv inline wqe!\n");
return ret;
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 2a9e880..8840a9d 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -164,12 +164,14 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
struct ib_uverbs_reg_mr_resp resp;
if (!addr) {
- fprintf(stderr, "2nd parm addr is NULL!\n");
+ verbs_err(verbs_get_ctx(pd->context),
+ "2nd parm addr is NULL!\n");
return NULL;
}
if (!length) {
- fprintf(stderr, "3st parm length is 0!\n");
+ verbs_err(verbs_get_ctx(pd->context),
+ "3st parm length is 0!\n");
return NULL;
}
--
2.27.0

View File

@ -1,34 +0,0 @@
From 4780e0a4c8cf2112425d04b939825a30603d87e6 Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Tue, 9 Nov 2021 20:41:03 +0800
Subject: libhns: The function declaration should be the same as the definition
The parameter names should be the same when the function is declared and
defined.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 21a5a6b..a5aa469 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -372,9 +372,9 @@ int hns_roce_u_free_pd(struct ibv_pd *pd);
struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
uint64_t hca_va, int access);
-int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
+int hns_roce_u_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
void *addr, size_t length, int access);
-int hns_roce_u_dereg_mr(struct verbs_mr *mr);
+int hns_roce_u_dereg_mr(struct verbs_mr *vmr);
struct ibv_mw *hns_roce_u_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
int hns_roce_u_dealloc_mw(struct ibv_mw *mw);
--
2.27.0

View File

@ -1,40 +0,0 @@
From 46c810472a1a6e3e093c21b6bcd43af0a0eda10b Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Tue, 9 Nov 2021 20:41:02 +0800
Subject: libhns: The content of the header file should be protected with
#define
Header files should be protected with #define to prevent repeated
inclusion.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u_db.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
index 13df9b5..ca056c3 100644
--- a/providers/hns/hns_roce_u_db.h
+++ b/providers/hns/hns_roce_u_db.h
@@ -29,14 +29,14 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef _HNS_ROCE_U_DB_H
+#define _HNS_ROCE_U_DB_H
#include <linux/types.h>
#include <util/mmio.h>
#include "hns_roce_u.h"
-#ifndef _HNS_ROCE_U_DB_H
-#define _HNS_ROCE_U_DB_H
#define HNS_ROCE_WORD_NUM 2
--
2.27.0

View File

@ -1,124 +0,0 @@
From dc29ea131407fbbe93497059b61e3ef22a675df1 Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Tue, 9 Nov 2021 20:41:01 +0800
Subject: libhns: Fix wrong type of variables and fields
Some variables and fields should be in type of unsigned instead of signed.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u.h | 6 +++---
providers/hns/hns_roce_u_hw_v1.c | 6 +++---
providers/hns/hns_roce_u_hw_v2.c | 11 +++++------
3 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index a5aa469..92dc26c 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -101,7 +101,7 @@
#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
-#define hr_ilog32(n) ilog32((n) - 1)
+#define hr_ilog32(n) ilog32((unsigned int)(n) - 1)
enum {
HNS_ROCE_QP_TABLE_BITS = 8,
@@ -205,7 +205,7 @@ struct hns_roce_cq {
struct hns_roce_idx_que {
struct hns_roce_buf buf;
- int entry_shift;
+ unsigned int entry_shift;
unsigned long *bitmap;
int bitmap_cnt;
unsigned int head;
@@ -252,7 +252,7 @@ struct hns_roce_sge_info {
struct hns_roce_sge_ex {
int offset;
unsigned int sge_cnt;
- int sge_shift;
+ unsigned int sge_shift;
};
struct hns_roce_rinl_sge {
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 6e107af..838e004 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
uint32_t qpn)
{
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
if (ctx->qp_table[tind].refcnt) {
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
{
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
if (!--ctx->qp_table[tind].refcnt)
free(ctx->qp_table[tind].table);
@@ -740,7 +740,7 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
struct ibv_recv_wr **bad_wr)
{
int ret = 0;
- int nreq;
+ unsigned int nreq;
struct ibv_sge *sg;
struct hns_roce_rc_rq_wqe *rq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibvqp);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d4b76b5..d0df51a 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -248,7 +248,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
return srq->wqe_buf.buf + (n << srq->wqe_shift);
}
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
{
return idx_que->buf.buf + (n << idx_que->entry_shift);
}
@@ -352,7 +352,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
uint32_t qpn)
{
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
if (ctx->qp_table[tind].refcnt)
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
@@ -982,9 +982,8 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
return ret;
}
-static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp,
- struct ibv_send_wr *wr, int nreq,
- struct hns_roce_sge_info *sge_info)
+static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ unsigned int nreq, struct hns_roce_sge_info *sge_info)
{
struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
@@ -1140,7 +1139,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
}
static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- int nreq, struct hns_roce_sge_info *sge_info)
+ unsigned int nreq, struct hns_roce_sge_info *sge_info)
{
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
struct hns_roce_v2_wqe_data_seg *dseg;
--
2.27.0

View File

@ -1,30 +0,0 @@
From 031ccf570369d820dab067cf29fb17e338cd4b28 Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Tue, 9 Nov 2021 20:41:00 +0800
Subject: libhns: Fix wrong print format for unsigned type
Change %d printf fortmat to %u for unsigned int variant.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u_verbs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 8840a9d..923c005 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -73,7 +73,7 @@ int hns_roce_u_query_device(struct ibv_context *context,
sub_minor = raw_fw_ver & 0xffff;
snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver),
- "%d.%d.%03d", major, minor, sub_minor);
+ "%u.%u.%03u", major, minor, sub_minor);
return 0;
}
--
2.27.0

View File

@ -1,33 +0,0 @@
From e451dbaff5f0dd1715b6411169e970021cd43f4f Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Tue, 9 Nov 2021 20:40:59 +0800
Subject: libhns: Remove redundant variable initialization
The variable of owner_bit has been assigned before the reference, so there
is no need to initialize.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u_hw_v2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d0df51a..5fb6477 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1399,9 +1399,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
{
int nfreed = 0;
bool is_recv_cqe;
+ uint8_t owner_bit;
uint16_t wqe_index;
uint32_t prod_index;
- uint8_t owner_bit = 0;
struct hns_roce_v2_cqe *cqe, *dest;
struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
--
2.27.0

View File

@ -1,33 +0,0 @@
From 21d81f659d801230a1ccf1aadf9b1ecba5a3ccd8 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Tue, 9 Nov 2021 20:40:57 +0800
Subject: libhns: Remove unused macros
These macros used to work, but are no longer used, they should be removed.
Fixes: 516b8d4e4ebe ("providers: Use the new match_device and allocate_device ops")
Fixes: 887b78c80224 ("libhns: Add initial main frame")
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 3b31ad3..9dc4905 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -41,9 +41,6 @@
static void hns_roce_free_context(struct ibv_context *ibctx);
-#define HID_LEN 15
-#define DEV_MATCH_LEN 128
-
#ifndef PCI_VENDOR_ID_HUAWEI
#define PCI_VENDOR_ID_HUAWEI 0x19E5
#endif
--
2.27.0

View File

@ -1,545 +0,0 @@
From 0851ae661c4fe4dd285c22c6acce462fc8004b8d Mon Sep 17 00:00:00 2001
From: Yixian Liu <liuyixian@huawei.com>
Date: Thu, 18 Nov 2021 22:46:10 +0800
Subject: libhns: Refactor the poll one interface
Mainly about:
1. Separate the differences between various objects (such as sq, rq, srq)
into functions.
2. Optimize function names, variable names, and comments to increase code
readability.
3. Use map instead of switch branch to simplify the code.
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 373 +++++++++++++++----------------
providers/hns/hns_roce_u_hw_v2.h | 10 +-
2 files changed, 189 insertions(+), 194 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 5fb6477..1b4e91b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -154,59 +154,37 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return 0;
}
-static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe,
- struct ibv_wc *wc)
-{
- unsigned int status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
- CQE_BYTE_4_STATUS_S);
- unsigned int cqe_status = status & HNS_ROCE_V2_CQE_STATUS_MASK;
+static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ uint8_t status)
+{
+ static const struct {
+ unsigned int cqe_status;
+ enum ibv_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
+ };
- switch (cqe_status) {
- case HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR:
- wc->status = IBV_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR:
- wc->status = IBV_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_V2_CQE_LOCAL_PROT_ERR:
- wc->status = IBV_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_V2_CQE_WR_FLUSH_ERR:
- wc->status = IBV_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR:
- wc->status = IBV_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_V2_CQE_BAD_RESP_ERR:
- wc->status = IBV_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR:
- wc->status = IBV_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR:
- wc->status = IBV_WC_REM_INV_REQ_ERR;
- break;
- case HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR:
- wc->status = IBV_WC_REM_ACCESS_ERR;
- break;
- case HNS_ROCE_V2_CQE_REMOTE_OP_ERR:
- wc->status = IBV_WC_REM_OP_ERR;
- break;
- case HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IBV_WC_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR:
- wc->status = IBV_WC_RNR_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR:
- wc->status = IBV_WC_REM_ABORT_ERR;
- break;
- case HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR:
- wc->status = IBV_WC_REM_INV_RD_REQ_ERR;
- break;
- default:
- wc->status = IBV_WC_GENERAL_ERR;
- break;
+ int i;
+
+ wc->status = IBV_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
+ if (status == map[i].cqe_status) {
+ wc->status = map[i].wc_status;
+ break;
+ }
}
}
@@ -268,6 +246,27 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
pthread_spin_unlock(&srq->lock);
}
+static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_context *ctx,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_srq **srq)
+{
+ uint32_t srqn;
+
+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
+ srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
+ CQE_BYTE_12_XRC_SRQN_S);
+
+ *srq = hns_roce_find_srq(ctx, srqn);
+ if (!*srq)
+ return -EINVAL;
+ } else if (hr_qp->verbs_qp.qp.srq) {
+ *srq = to_hr_srq(hr_qp->verbs_qp.qp.srq);
+ }
+
+ return 0;
+}
+
static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
struct hns_roce_cq *cq)
{
@@ -332,7 +331,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
hns_roce_write512(qp->sq.db_reg, wqe);
}
-static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
+static void update_cq_db(struct hns_roce_context *ctx,
struct hns_roce_cq *cq)
{
struct hns_roce_db cq_db = {};
@@ -378,19 +377,17 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
int attr_mask);
-static int hns_roce_flush_cqe(struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
+static int hns_roce_flush_cqe(struct hns_roce_qp *hr_qp, uint8_t status)
{
struct ibv_qp_attr attr;
int attr_mask;
- if ((wc->status != IBV_WC_SUCCESS) &&
- (wc->status != IBV_WC_WR_FLUSH_ERR)) {
+ if (status != HNS_ROCE_V2_CQE_WR_FLUSH_ERR) {
attr_mask = IBV_QP_STATE;
attr.qp_state = IBV_QPS_ERR;
- hns_roce_u_v2_modify_qp(&(*cur_qp)->verbs_qp.qp, &attr,
- attr_mask);
+ hns_roce_u_v2_modify_qp(&hr_qp->verbs_qp.qp, &attr, attr_mask);
- (*cur_qp)->verbs_qp.qp.state = IBV_QPS_ERR;
+ hr_qp->verbs_qp.qp.state = IBV_QPS_ERR;
}
return V2_CQ_OK;
@@ -409,41 +406,6 @@ static const unsigned int wc_send_op_map[] = {
[HNS_ROCE_SQ_OP_BIND_MW] = IBV_WC_BIND_MW,
};
-static void hns_roce_v2_get_opcode_from_sender(struct hns_roce_v2_cqe *cqe,
- struct ibv_wc *wc)
-{
- uint32_t opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
- CQE_BYTE_4_OPCODE_S);
-
- switch (opcode) {
- case HNS_ROCE_SQ_OP_SEND:
- case HNS_ROCE_SQ_OP_SEND_WITH_INV:
- case HNS_ROCE_SQ_OP_RDMA_WRITE:
- case HNS_ROCE_SQ_OP_BIND_MW:
- wc->wc_flags = 0;
- break;
- case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
- case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
- wc->wc_flags = IBV_WC_WITH_IMM;
- break;
- case HNS_ROCE_SQ_OP_LOCAL_INV:
- wc->wc_flags = IBV_WC_WITH_INV;
- break;
- case HNS_ROCE_SQ_OP_RDMA_READ:
- case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
- case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
- wc->wc_flags = 0;
- wc->byte_len = le32toh(cqe->byte_cnt);
- break;
- default:
- wc->status = IBV_WC_GENERAL_ERR;
- wc->wc_flags = 0;
- return;
- }
-
- wc->opcode = wc_send_op_map[opcode];
-}
-
static const unsigned int wc_rcv_op_map[] = {
[HNS_ROCE_RECV_OP_RDMA_WRITE_IMM] = IBV_WC_RECV_RDMA_WITH_IMM,
[HNS_ROCE_RECV_OP_SEND] = IBV_WC_RECV,
@@ -451,9 +413,8 @@ static const unsigned int wc_rcv_op_map[] = {
[HNS_ROCE_RECV_OP_SEND_WITH_INV] = IBV_WC_RECV,
};
-static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe,
- struct ibv_wc *wc,
- uint32_t opcode)
+static void get_opcode_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ uint32_t opcode)
{
switch (opcode) {
case HNS_ROCE_RECV_OP_SEND:
@@ -476,9 +437,8 @@ static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe,
wc->opcode = wc_rcv_op_map[opcode];
}
-static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
- struct hns_roce_qp **cur_qp,
- struct ibv_wc *wc, uint32_t opcode)
+static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ struct hns_roce_qp **cur_qp, uint32_t opcode)
{
if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
(opcode == HNS_ROCE_RECV_OP_SEND ||
@@ -521,26 +481,117 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
return V2_CQ_OK;
}
+static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
+{
+ wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
+ CQE_BYTE_32_SL_S);
+ wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
+ CQE_BYTE_32_RMT_QPN_S);
+ wc->slid = 0;
+ wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
+ IBV_WC_GRH : 0;
+ wc->pkey_index = 0;
+}
+
+static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ struct hns_roce_srq *srq)
+{
+ uint32_t wqe_idx;
+
+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
+ CQE_BYTE_4_WQE_IDX_S);
+ wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
+}
+
+static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ struct hns_roce_qp *hr_qp, uint8_t opcode)
+{
+ struct hns_roce_wq *wq;
+ int ret;
+
+ wq = &hr_qp->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+
+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
+ parse_for_ud_qp(cqe, wc);
+
+ ret = handle_recv_inl_wqe(cqe, wc, &hr_qp, opcode);
+ if (ret) {
+ verbs_err(verbs_get_ctx(hr_qp->verbs_qp.qp.context),
+ PFX "failed to handle recv inline wqe!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ struct hns_roce_qp *hr_qp, uint8_t opcode)
+{
+ struct hns_roce_wq *wq;
+ uint32_t wqe_idx;
+
+ wq = &hr_qp->sq;
+ /*
+ * in case of signalling, the tail pointer needs to be updated
+ * according to the wqe idx in the current cqe first
+ */
+ if (hr_qp->sq_signal_bits) {
+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
+ CQE_BYTE_4_WQE_IDX_S);
+ /* get the processed wqes num since last signalling */
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
+ }
+ /* write the wr_id of wq into the wc */
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+
+ switch (opcode) {
+ case HNS_ROCE_SQ_OP_SEND:
+ case HNS_ROCE_SQ_OP_SEND_WITH_INV:
+ case HNS_ROCE_SQ_OP_RDMA_WRITE:
+ case HNS_ROCE_SQ_OP_BIND_MW:
+ wc->wc_flags = 0;
+ break;
+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
+ wc->wc_flags = IBV_WC_WITH_IMM;
+ break;
+ case HNS_ROCE_SQ_OP_LOCAL_INV:
+ wc->wc_flags = IBV_WC_WITH_INV;
+ break;
+ case HNS_ROCE_SQ_OP_RDMA_READ:
+ case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
+ case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
+ wc->wc_flags = 0;
+ wc->byte_len = le32toh(cqe->byte_cnt);
+ break;
+ default:
+ wc->status = IBV_WC_GENERAL_ERR;
+ wc->wc_flags = 0;
+ return;
+ }
+
+ wc->opcode = wc_send_op_map[opcode];
+}
+
static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
{
struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
struct hns_roce_srq *srq = NULL;
- struct hns_roce_wq *wq = NULL;
struct hns_roce_v2_cqe *cqe;
- uint16_t wqe_ctr;
- uint32_t opcode;
- uint32_t srqn;
+ uint8_t opcode;
+ uint8_t status;
uint32_t qpn;
- int is_send;
- int ret;
+ bool is_send;
- /* According to CI, find the relative cqe */
cqe = next_cqe_sw_v2(cq);
if (!cqe)
return V2_CQ_EMPTY;
- /* Get the next cqe, CI will be added gradually */
++cq->cons_index;
udma_from_device_barrier();
@@ -548,102 +599,48 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
CQE_BYTE_16_LCL_QPN_S);
- is_send = (roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) ==
- HNS_ROCE_V2_CQE_IS_SQ);
-
- /* if qp is zero, it will not get the correct qpn */
+ /* if cur qp is null, then could not get the correct qpn */
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
*cur_qp = hns_roce_v2_find_qp(ctx, qpn);
if (!*cur_qp)
return V2_CQ_POLL_ERR;
}
- wc->qp_num = qpn;
- if ((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
- CQE_BYTE_12_XRC_SRQN_S);
+ status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
+ CQE_BYTE_4_STATUS_S);
+ opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
+ CQE_BYTE_4_OPCODE_S);
+ is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ;
+ if (is_send) {
+ parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
+ } else {
+ wc->byte_len = le32toh(cqe->byte_cnt);
+ get_opcode_for_resp(cqe, wc, opcode);
- srq = hns_roce_find_srq(ctx, srqn);
- if (!srq)
+ if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq))
return V2_CQ_POLL_ERR;
- } else if ((*cur_qp)->verbs_qp.qp.srq) {
- srq = to_hr_srq((*cur_qp)->verbs_qp.qp.srq);
- }
- if (is_send) {
- wq = &(*cur_qp)->sq;
- /*
- * if sq_signal_bits is 1, the tail pointer first update to
- * the wqe corresponding the current cqe
- */
- if ((*cur_qp)->sq_signal_bits) {
- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4,
- CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S));
- /*
- * wq->tail will plus a positive number every time,
- * when wq->tail exceeds 32b, it is 0 and acc
- */
- wq->tail += (wqe_ctr - (uint16_t) wq->tail) &
- (wq->wqe_cnt - 1);
+ if (srq) {
+ parse_cqe_for_srq(cqe, wc, srq);
+ } else {
+ if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode))
+ return V2_CQ_POLL_ERR;
}
- /* write the wr_id of wq into the wc */
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- } else if (srq) {
- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4,
- CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S));
- wc->wr_id = srq->wrid[wqe_ctr & (srq->wqe_cnt - 1)];
- hns_roce_free_srq_wqe(srq, wqe_ctr);
- } else {
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
}
+ wc->qp_num = qpn;
+
/*
- * HW maintains wc status, set the err type and directly return, after
- * generated the incorrect CQE
+ * once a cqe in error status, the driver needs to help the HW to
+ * generated flushed cqes for all subsequent wqes
*/
- if (roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
- CQE_BYTE_4_STATUS_S) != HNS_ROCE_V2_CQE_SUCCESS) {
- hns_roce_v2_handle_error_cqe(cqe, wc);
- return hns_roce_flush_cqe(cur_qp, wc);
+ if (status != HNS_ROCE_V2_CQE_SUCCESS) {
+ handle_error_cqe(cqe, wc, status);
+ return hns_roce_flush_cqe(*cur_qp, status);
}
wc->status = IBV_WC_SUCCESS;
- /*
- * According to the opcode type of cqe, mark the opcode and other
- * information of wc
- */
- if (is_send) {
- hns_roce_v2_get_opcode_from_sender(cqe, wc);
- } else {
- /* Get opcode and flag in rq&srq */
- wc->byte_len = le32toh(cqe->byte_cnt);
- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
- CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK;
- hns_roce_v2_get_opcode_from_receiver(cqe, wc, opcode);
-
- ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode);
- if (ret) {
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
- PFX "failed to handle recv inline wqe!\n");
- return ret;
- }
-
- wc->sl = (uint8_t)roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
- CQE_BYTE_32_SL_S);
- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
- CQE_BYTE_32_RMT_QPN_S);
- wc->slid = 0;
- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
- IBV_WC_GRH : 0;
- wc->pkey_index = 0;
- }
-
return V2_CQ_OK;
}
@@ -668,7 +665,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
*cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M;
else
- hns_roce_v2_update_cq_cons_index(ctx, cq);
+ update_cq_db(ctx, cq);
}
pthread_spin_unlock(&cq->lock);
@@ -1438,7 +1435,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
if (nfreed) {
cq->cons_index += nfreed;
udma_to_device_barrier();
- hns_roce_v2_update_cq_cons_index(ctx, cq);
+ update_cq_db(ctx, cq);
}
}
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index af72cd7..51a1df4 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -33,7 +33,10 @@
#ifndef _HNS_ROCE_U_HW_V2_H
#define _HNS_ROCE_U_HW_V2_H
-#define HNS_ROCE_V2_CQE_IS_SQ 0
+enum {
+ CQE_FOR_SQ,
+ CQE_FOR_RQ,
+};
#define HNS_ROCE_V2_CQ_DB_REQ_SOL 1
#define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0
@@ -94,11 +97,6 @@ enum {
V2_CQ_POLL_ERR = -2,
};
-enum {
- HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
- HNS_ROCE_V2_CQE_OPCODE_MASK = 0x1f,
-};
-
enum {
HNS_ROCE_V2_CQE_SUCCESS = 0x00,
HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR = 0x01,
--
2.27.0

View File

@ -1,42 +0,0 @@
From 72f495e542c1c458e71fd6971f412edec41830e1 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 24 Nov 2021 19:03:54 +0800
Subject: libhns: hr ilog32() should be represented by a function instead of a
macro
The compiler will check whether the modifiers of the function are of the
correct type.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 92dc26c..c1ae1c9 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -101,8 +101,6 @@
#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
-#define hr_ilog32(n) ilog32((unsigned int)(n) - 1)
-
enum {
HNS_ROCE_QP_TABLE_BITS = 8,
HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
@@ -326,6 +324,11 @@ static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
return hr_hw_page_align(count << buf_shift);
}
+static inline unsigned int hr_ilog32(unsigned int count)
+{
+ return ilog32(count - 1);
+}
+
static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev)
{
return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device);
--
2.27.0

View File

@ -1,30 +0,0 @@
From 61911051eec0f984537c2762208b8ecbc875d5d3 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Fri, 26 Nov 2021 16:53:18 +0800
Subject: libhns: Fix the size setting error when copying CQE in clean cq()
The size of CQE is different for different versions of hardware, so the
driver needs to specify the size of CQE explicitly.
Fixes: 3546e6b69ac8 ("libhns: Add support for CQE in size of 64 Bytes")
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 1b4e91b..b13b6dc 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1426,7 +1426,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
(prod_index + nfreed) & cq->ibv_cq.cqe);
owner_bit = roce_get_bit(dest->byte_4,
CQE_BYTE_4_OWNER_S);
- memcpy(dest, cqe, sizeof(*cqe));
+ memcpy(dest, cqe, cq->cqe_size);
roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S,
owner_bit);
}
--
2.27.0

View File

@ -1,53 +0,0 @@
From 8fbb85bae3fd2632da80e77d02bbbe73aac85f88 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Fri, 26 Nov 2021 17:55:32 +0800
Subject: libhns: Fix the problem that XRC does not need to create RQ
XRC QP does not require RQ, so RQ should not be created.
Fixes: 4ed874a5cf30 ("libhns: Add support for XRC for HIP09")
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 923c005..557d075 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -774,12 +774,22 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
return 0;
}
+static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr)
+{
+ if (attr->qp_type == IBV_QPT_XRC_SEND ||
+ attr->qp_type == IBV_QPT_XRC_RECV || attr->srq)
+ return 0;
+
+ return 1;
+}
+
static int verify_qp_create_cap(struct hns_roce_context *ctx,
struct ibv_qp_init_attr_ex *attr)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
struct ibv_qp_cap *cap = &attr->cap;
uint32_t min_wqe_num;
+ int has_rq;
if (!cap->max_send_wr && attr->qp_type != IBV_QPT_XRC_RECV)
return -EINVAL;
@@ -790,7 +800,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
cap->max_recv_sge > ctx->max_sge)
return -EINVAL;
- if (attr->srq) {
+ has_rq = hns_roce_qp_has_rq(attr);
+ if (!has_rq) {
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
}
--
2.27.0

View File

@ -1,45 +0,0 @@
From 29fd05367349c7909949206a13092031b689eca7 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Tue, 30 Nov 2021 20:46:14 +0800
Subject: libhns: Add vendor_err information for error WC
ULP can get more error information of CQ though verbs.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 3 +++
providers/hns/hns_roce_u_hw_v2.h | 3 +++
2 files changed, 6 insertions(+)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b13b6dc..18399e9 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -186,6 +186,9 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
break;
}
}
+
+ wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M,
+ CQE_BYTE_16_SUB_STATUS_S);
}
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 51a1df4..014cb8c 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -184,6 +184,9 @@ struct hns_roce_v2_cqe {
#define CQE_BYTE_16_LCL_QPN_S 0
#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S)
+#define CQE_BYTE_16_SUB_STATUS_S 24
+#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S)
+
#define CQE_BYTE_28_SMAC_S 0
#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S)
--
2.27.0

View File

@ -1,69 +0,0 @@
From 46548879b84e8c502198a549d82ec079ebc8b9a0 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Thu, 2 Dec 2021 21:44:26 +0800
Subject: libhns: Forcibly rewrite the inline flag of WQE
When a non-inline WR reuses a WQE that was used for inline the last time,
the remaining inline flag should be cleared.
Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE")
Fixes: 82fc508a6625 ("libhns: Add support for UD inline")
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 18399e9..4eaa929 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -876,8 +876,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
if (!check_inl_data_len(qp, sge_info->total_len))
return -EINVAL;
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, 1);
-
if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) {
roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0);
@@ -993,6 +991,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
!!(wr->send_flags & IBV_SEND_SIGNALED));
roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
!!(wr->send_flags & IBV_SEND_SOLICITED));
+ roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S,
+ !!(wr->send_flags & IBV_SEND_INLINE));
ret = check_ud_opcode(ud_sq_wqe, wr);
if (ret)
@@ -1044,8 +1044,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
dseg += sizeof(struct hns_roce_rc_sq_wqe);
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1);
-
if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) {
roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
0);
@@ -1150,13 +1148,13 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return ret;
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
- (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0);
-
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
- (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0);
-
+ !!(wr->send_flags & IBV_SEND_FENCE));
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
- (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0);
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
+ !!(wr->send_flags & IBV_SEND_INLINE));
roce_set_field(rc_sq_wqe->byte_20,
RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
--
2.27.0

View File

@ -1,44 +0,0 @@
From 2194680136400d6a5f12298ff4993fa6f51c2e10 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Wed, 8 Dec 2021 19:03:56 +0800
Subject: libhns: Forcibly rewrite the strong-order flag of WQE
The Local Invalid operation sets so flag, otherwise clears so flag.
Fixes: a9ae7e9bfb5d ("libhns: Add local invalidate MR support for hip08")
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4eaa929..cf871ab 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1143,10 +1143,6 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
struct hns_roce_v2_wqe_data_seg *dseg;
int ret;
- ret = check_rc_opcode(rc_sq_wqe, wr);
- if (ret)
- return ret;
-
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
!!(wr->send_flags & IBV_SEND_SIGNALED));
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
@@ -1155,6 +1151,11 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
!!(wr->send_flags & IBV_SEND_SOLICITED));
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
!!(wr->send_flags & IBV_SEND_INLINE));
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0);
+
+ ret = check_rc_opcode(rc_sq_wqe, wr);
+ if (ret)
+ return ret;
roce_set_field(rc_sq_wqe->byte_20,
RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
--
2.27.0

View File

@ -1,32 +0,0 @@
From 2a2e3ece2ff801e8d8e4915a56fe3fff8399d6a0 Mon Sep 17 00:00:00 2001
From: Firas Jahjah <firasj@amazon.com>
Date: Tue, 28 Dec 2021 15:58:37 +0200
Subject: util: Fix mmio memcpy on ARM
The below commit added a new implementation of mmio_memcpy_x64() for
ARM which was broken. The destination buffer must be advanced so we
don't copy to the same 64 bytes.
Fixes: 159933c37 ("libhns: Add support for direct wqe")
Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
Reviewed-by: Yossi Leybovich <sleybo@amazon.com>
Signed-off-by: Firas Jahjah <firasj@amazon.com>
---
util/mmio.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/util/mmio.h b/util/mmio.h
index 01d1455..5974058 100644
--- a/util/mmio.h
+++ b/util/mmio.h
@@ -225,6 +225,7 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
_mmio_memcpy_x64_64b(dest, src);
bytecnt -= sizeof(uint64x2x4_t);
src += sizeof(uint64x2x4_t);
+ dest += sizeof(uint64x2x4_t);
} while (bytecnt > 0);
}
--
2.27.0

View File

@ -1,550 +0,0 @@
From 532c4b6babe97e3023a049f1c6bd8a8e3ad95140 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Sat, 25 Dec 2021 17:42:55 +0800
Subject: libhns: Use new interfaces hr reg ***() to operate the WQE field
Use hr_reg_xxx() to simply the codes for filling fields.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 170 ++++++++++------------------
providers/hns/hns_roce_u_hw_v2.h | 184 ++++++++++++++-----------------
2 files changed, 144 insertions(+), 210 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index cf871ab..0cff12b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -323,13 +323,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
/* All kinds of DirectWQE have the same header field layout */
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1);
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M,
- RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl);
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M,
- RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT);
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
- RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+ hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_L, qp->sl);
+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT);
+ hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head);
hns_roce_write512(qp->sq.db_reg, wqe);
}
@@ -834,29 +831,15 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr,
tmp += wr->sg_list[i].length;
}
- roce_set_field(ud_sq_wqe->msg_len,
- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M,
- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S,
- *loc & 0xffff);
-
- roce_set_field(ud_sq_wqe->sge_num_pd,
- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M,
- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S,
- (*loc >> 16) & 0xff);
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff);
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff);
tmp_data = *loc >> 24;
loc++;
tmp_data |= ((*loc & 0xffff) << 8);
- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx,
- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M,
- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S,
- tmp_data);
-
- roce_set_field(ud_sq_wqe->udpspn_rsv,
- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M,
- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S,
- *loc >> 16);
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_47_24, tmp_data);
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16);
}
static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
@@ -877,13 +860,11 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
return -EINVAL;
if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) {
- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0);
+ hr_reg_clear(ud_sq_wqe, UDWQE_INLINE_TYPE);
fill_ud_inn_inl_data(wr, ud_sq_wqe);
} else {
- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 1);
+ hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE);
ret = fill_ext_sge_inl_data(qp, wr, sge_info);
if (ret)
@@ -891,8 +872,7 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
sge_info->valid_num = sge_info->start_idx - sge_idx;
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M,
- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num);
+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
}
return 0;
@@ -919,8 +899,7 @@ static int check_ud_opcode(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
ud_sq_wqe->immtdata = get_immtdata(ib_op, wr);
- roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M,
- UD_SQ_WQE_OPCODE_S, to_hr_opcode(ib_op));
+ hr_reg_write(ud_sq_wqe, UDWQE_OPCODE, to_hr_opcode(ib_op));
return 0;
}
@@ -931,24 +910,12 @@ static int fill_ud_av(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
if (unlikely(ah->av.sl > MAX_SERVICE_LEVEL))
return EINVAL;
- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M,
- UD_SQ_WQE_SL_S, ah->av.sl);
-
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M,
- UD_SQ_WQE_PD_S, to_hr_pd(ah->ibv_ah.pd)->pdn);
-
- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M,
- UD_SQ_WQE_TCLASS_S, ah->av.tclass);
-
- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M,
- UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit);
-
- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M,
- UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel);
-
- roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M,
- UD_SQ_WQE_UDP_SPN_S, ah->av.udp_sport);
-
+ hr_reg_write(ud_sq_wqe, UDWQE_SL, ah->av.sl);
+ hr_reg_write(ud_sq_wqe, UDWQE_PD, to_hr_pd(ah->ibv_ah.pd)->pdn);
+ hr_reg_write(ud_sq_wqe, UDWQE_TCLASS, ah->av.tclass);
+ hr_reg_write(ud_sq_wqe, UDWQE_HOPLIMIT, ah->av.hop_limit);
+ hr_reg_write(ud_sq_wqe, UDWQE_FLOW_LABEL, ah->av.flowlabel);
+ hr_reg_write(ud_sq_wqe, UDWQE_UDPSPN, ah->av.udp_sport);
memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
ud_sq_wqe->sgid_index = ah->av.gid_index;
memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE);
@@ -962,17 +929,14 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
{
int ret = 0;
- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx,
- UD_SQ_WQE_MSG_START_SGE_IDX_M,
- UD_SQ_WQE_MSG_START_SGE_IDX_S,
- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
+ hr_reg_write(ud_sq_wqe, UDWQE_MSG_START_SGE_IDX,
+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M,
- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num);
+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
if (wr->send_flags & IBV_SEND_INLINE)
ret = set_ud_inl(qp, wr, ud_sq_wqe, sge_info);
@@ -987,12 +951,12 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
int ret = 0;
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S,
- !!(wr->send_flags & IBV_SEND_SIGNALED));
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
- !!(wr->send_flags & IBV_SEND_SOLICITED));
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S,
- !!(wr->send_flags & IBV_SEND_INLINE));
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_CQE,
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_SE,
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_INLINE,
+ !!(wr->send_flags & IBV_SEND_INLINE));
ret = check_ud_opcode(ud_sq_wqe, wr);
if (ret)
@@ -1001,8 +965,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey & 0x80000000 ?
qp->qkey : wr->wr.ud.remote_qkey);
- roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M,
- UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn);
+ hr_reg_write(ud_sq_wqe, UDWQE_DQPN, wr->wr.ud.remote_qpn);
ret = fill_ud_av(ud_sq_wqe, ah);
if (ret)
@@ -1021,8 +984,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
udma_to_device_barrier();
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S,
- ~((qp->sq.head + nreq) >> qp->sq.shift));
+ hr_reg_write_bool(wqe, RCWQE_OWNER,
+ !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
return ret;
}
@@ -1045,8 +1008,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
dseg += sizeof(struct hns_roce_rc_sq_wqe);
if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) {
- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
- 0);
+ hr_reg_clear(rc_sq_wqe, RCWQE_INLINE_TYPE);
for (i = 0; i < wr->num_sge; i++) {
memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr),
@@ -1054,8 +1016,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
dseg += wr->sg_list[i].length;
}
} else {
- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
- 1);
+ hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE);
ret = fill_ext_sge_inl_data(qp, wr, sge_info);
if (ret)
@@ -1063,9 +1024,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
sge_info->valid_num = sge_info->start_idx - sge_idx;
- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
- RC_SQ_WQE_BYTE_16_SGE_NUM_S,
- sge_info->valid_num);
+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
}
return 0;
@@ -1074,17 +1033,16 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
const struct ibv_send_wr *wr)
{
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S,
- wr->bind_mw.mw->type - 1);
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S,
- (wr->bind_mw.bind_info.mw_access_flags &
- IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0);
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S,
- (wr->bind_mw.bind_info.mw_access_flags &
- IBV_ACCESS_REMOTE_READ) ? 1 : 0);
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S,
- (wr->bind_mw.bind_info.mw_access_flags &
- IBV_ACCESS_REMOTE_WRITE) ? 1 : 0);
+ unsigned int access = wr->bind_mw.bind_info.mw_access_flags;
+
+ hr_reg_write_bool(wqe, RCWQE_MW_TYPE, wr->bind_mw.mw->type - 1);
+ hr_reg_write_bool(wqe, RCWQE_MW_RA_EN,
+ !!(access & IBV_ACCESS_REMOTE_ATOMIC));
+ hr_reg_write_bool(wqe, RCWQE_MW_RR_EN,
+ !!(access & IBV_ACCESS_REMOTE_READ));
+ hr_reg_write_bool(wqe, RCWQE_MW_RW_EN,
+ !!(access & IBV_ACCESS_REMOTE_WRITE));
+
wqe->new_rkey = htole32(wr->bind_mw.rkey);
wqe->byte_16 = htole32(wr->bind_mw.bind_info.length &
HNS_ROCE_ADDRESS_MASK);
@@ -1117,7 +1075,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
wqe->va = htole64(wr->wr.atomic.remote_addr);
break;
case IBV_WR_LOCAL_INV:
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1);
+ hr_reg_enable(wqe, RCWQE_SO);
/* fallthrough */
case IBV_WR_SEND_WITH_INV:
wqe->inv_key = htole32(wr->invalidate_rkey);
@@ -1130,8 +1088,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
break;
}
- roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M,
- RC_SQ_WQE_BYTE_4_OPCODE_S, to_hr_opcode(wr->opcode));
+ hr_reg_write(wqe, RCWQE_OPCODE, to_hr_opcode(wr->opcode));
return ret;
}
@@ -1143,24 +1100,22 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
struct hns_roce_v2_wqe_data_seg *dseg;
int ret;
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
- !!(wr->send_flags & IBV_SEND_SIGNALED));
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
- !!(wr->send_flags & IBV_SEND_FENCE));
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
- !!(wr->send_flags & IBV_SEND_SOLICITED));
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
- !!(wr->send_flags & IBV_SEND_INLINE));
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0);
+ hr_reg_write_bool(wqe, RCWQE_CQE,
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
+ hr_reg_write_bool(wqe, RCWQE_FENCE,
+ !!(wr->send_flags & IBV_SEND_FENCE));
+ hr_reg_write_bool(wqe, RCWQE_SE,
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
+ hr_reg_write_bool(wqe, RCWQE_INLINE,
+ !!(wr->send_flags & IBV_SEND_INLINE));
+ hr_reg_clear(wqe, RCWQE_SO);
ret = check_rc_opcode(rc_sq_wqe, wr);
if (ret)
return ret;
- roce_set_field(rc_sq_wqe->byte_20,
- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
+ hr_reg_write(rc_sq_wqe, RCWQE_MSG_START_SGE_IDX,
+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
if (wr->opcode == IBV_WR_BIND_MW)
goto wqe_valid;
@@ -1172,8 +1127,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
- RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num);
+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
@@ -1196,8 +1150,8 @@ wqe_valid:
if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
udma_to_device_barrier();
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S,
- ~((qp->sq.head + nreq) >> qp->sq.shift));
+ hr_reg_write_bool(wqe, RCWQE_OWNER,
+ !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
return 0;
}
@@ -1243,10 +1197,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
switch (ibvqp->qp_type) {
case IBV_QPT_XRC_SEND:
- roce_set_field(wqe->byte_16,
- RC_SQ_WQE_BYTE_16_XRC_SRQN_M,
- RC_SQ_WQE_BYTE_16_XRC_SRQN_S,
- wr->qp_type.xrc.remote_srqn);
+ hr_reg_write(wqe, RCWQE_XRC_SRQN,
+ wr->qp_type.xrc.remote_srqn);
SWITCH_FALLTHROUGH;
case IBV_QPT_RC:
ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info);
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 014cb8c..4330b7d 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -220,53 +220,44 @@ struct hns_roce_rc_sq_wqe {
__le64 va;
};
-#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
-#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5
-#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
-
-#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13
-#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
-
-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15
-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
-
-#define RC_SQ_WQE_BYTE_4_OWNER_S 7
-
-#define RC_SQ_WQE_BYTE_4_CQE_S 8
-
-#define RC_SQ_WQE_BYTE_4_FENCE_S 9
-
-#define RC_SQ_WQE_BYTE_4_SO_S 10
-
-#define RC_SQ_WQE_BYTE_4_SE_S 11
-
-#define RC_SQ_WQE_BYTE_4_INLINE_S 12
-
-#define RC_SQ_WQE_BYTE_4_MW_TYPE_S 14
-
-#define RC_SQ_WQE_BYTE_4_ATOMIC_S 20
-
-#define RC_SQ_WQE_BYTE_4_RDMA_READ_S 21
-
-#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
-
-#define RC_SQ_WQE_BYTE_4_FLAG_S 31
-
-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
-
-#define RC_SQ_WQE_BYTE_16_SGE_NUM_S 24
-#define RC_SQ_WQE_BYTE_16_SGE_NUM_M \
- (((1UL << 8) - 1) << RC_SQ_WQE_BYTE_16_SGE_NUM_S)
-
-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M \
- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S)
-
-#define RC_SQ_WQE_BYTE_20_INL_TYPE_S 31
+#define RCWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_rc_sq_wqe, h, l)
+
+#define RCWQE_OPCODE RCWQE_FIELD_LOC(4, 0)
+#define RCWQE_DB_SL_L RCWQE_FIELD_LOC(6, 5)
+#define RCWQE_SQPN_L RCWQE_FIELD_LOC(6, 5)
+#define RCWQE_OWNER RCWQE_FIELD_LOC(7, 7)
+#define RCWQE_CQE RCWQE_FIELD_LOC(8, 8)
+#define RCWQE_FENCE RCWQE_FIELD_LOC(9, 9)
+#define RCWQE_SO RCWQE_FIELD_LOC(10, 10)
+#define RCWQE_SE RCWQE_FIELD_LOC(11, 11)
+#define RCWQE_INLINE RCWQE_FIELD_LOC(12, 12)
+#define RCWQE_DB_SL_H RCWQE_FIELD_LOC(14, 13)
+#define RCWQE_WQE_IDX RCWQE_FIELD_LOC(30, 15)
+#define RCWQE_SQPN_H RCWQE_FIELD_LOC(30, 13)
+#define RCWQE_FLAG RCWQE_FIELD_LOC(31, 31)
+#define RCWQE_MSG_LEN RCWQE_FIELD_LOC(63, 32)
+#define RCWQE_INV_KEY_IMMTDATA RCWQE_FIELD_LOC(95, 64)
+#define RCWQE_XRC_SRQN RCWQE_FIELD_LOC(119, 96)
+#define RCWQE_SGE_NUM RCWQE_FIELD_LOC(127, 120)
+#define RCWQE_MSG_START_SGE_IDX RCWQE_FIELD_LOC(151, 128)
+#define RCWQE_REDUCE_CODE RCWQE_FIELD_LOC(158, 152)
+#define RCWQE_INLINE_TYPE RCWQE_FIELD_LOC(159, 159)
+#define RCWQE_RKEY RCWQE_FIELD_LOC(191, 160)
+#define RCWQE_VA_L RCWQE_FIELD_LOC(223, 192)
+#define RCWQE_VA_H RCWQE_FIELD_LOC(255, 224)
+#define RCWQE_LEN0 RCWQE_FIELD_LOC(287, 256)
+#define RCWQE_LKEY0 RCWQE_FIELD_LOC(319, 288)
+#define RCWQE_VA0_L RCWQE_FIELD_LOC(351, 320)
+#define RCWQE_VA0_H RCWQE_FIELD_LOC(383, 352)
+#define RCWQE_LEN1 RCWQE_FIELD_LOC(415, 384)
+#define RCWQE_LKEY1 RCWQE_FIELD_LOC(447, 416)
+#define RCWQE_VA1_L RCWQE_FIELD_LOC(479, 448)
+#define RCWQE_VA1_H RCWQE_FIELD_LOC(511, 480)
+
+#define RCWQE_MW_TYPE RCWQE_FIELD_LOC(256, 256)
+#define RCWQE_MW_RA_EN RCWQE_FIELD_LOC(258, 258)
+#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
+#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
struct hns_roce_v2_wqe_data_seg {
__le32 len;
@@ -323,60 +314,51 @@ struct hns_roce_ud_sq_wqe {
uint8_t dgid[HNS_ROCE_GID_SIZE];
};
-#define UD_SQ_WQE_OPCODE_S 0
-#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0)
-
-#define UD_SQ_WQE_OWNER_S 7
-
-#define UD_SQ_WQE_CQE_S 8
-
-#define UD_SQ_WQE_SE_S 11
-
-#define UD_SQ_WQE_PD_S 0
-#define UD_SQ_WQE_PD_M GENMASK(23, 0)
-
-#define UD_SQ_WQE_SGE_NUM_S 24
-#define UD_SQ_WQE_SGE_NUM_M GENMASK(31, 24)
-
-#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0
-#define UD_SQ_WQE_MSG_START_SGE_IDX_M GENMASK(23, 0)
-
-#define UD_SQ_WQE_UDP_SPN_S 16
-#define UD_SQ_WQE_UDP_SPN_M GENMASK(31, 16)
-
-#define UD_SQ_WQE_DQPN_S 0
-#define UD_SQ_WQE_DQPN_M GENMASK(23, 0)
-
-#define UD_SQ_WQE_VLAN_S 0
-#define UD_SQ_WQE_VLAN_M GENMASK(15, 0)
-
-#define UD_SQ_WQE_HOPLIMIT_S 16
-#define UD_SQ_WQE_HOPLIMIT_M GENMASK(23, 16)
-
-#define UD_SQ_WQE_TCLASS_S 24
-#define UD_SQ_WQE_TCLASS_M GENMASK(31, 24)
-
-#define UD_SQ_WQE_FLOW_LABEL_S 0
-#define UD_SQ_WQE_FLOW_LABEL_M GENMASK(19, 0)
-
-#define UD_SQ_WQE_SL_S 20
-#define UD_SQ_WQE_SL_M GENMASK(23, 20)
-
-#define UD_SQ_WQE_VLAN_EN_S 30
-
-#define UD_SQ_WQE_LBI_S 31
-
-#define UD_SQ_WQE_BYTE_4_INL_S 12
-#define UD_SQ_WQE_BYTE_20_INL_TYPE_S 31
-
-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S 16
-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M GENMASK(31, 16)
-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S 24
-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M GENMASK(31, 24)
-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S 0
-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M GENMASK(23, 0)
-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S 0
-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M GENMASK(15, 0)
+#define UDWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ud_sq_wqe, h, l)
+
+#define UDWQE_OPCODE UDWQE_FIELD_LOC(4, 0)
+#define UDWQE_DB_SL_L UDWQE_FIELD_LOC(6, 5)
+#define UDWQE_OWNER UDWQE_FIELD_LOC(7, 7)
+#define UDWQE_CQE UDWQE_FIELD_LOC(8, 8)
+#define UDWQE_RSVD1 UDWQE_FIELD_LOC(10, 9)
+#define UDWQE_SE UDWQE_FIELD_LOC(11, 11)
+#define UDWQE_INLINE UDWQE_FIELD_LOC(12, 12)
+#define UDWQE_DB_SL_H UDWQE_FIELD_LOC(14, 13)
+#define UDWQE_WQE_IDX UDWQE_FIELD_LOC(30, 15)
+#define UDWQE_FLAG UDWQE_FIELD_LOC(31, 31)
+#define UDWQE_MSG_LEN UDWQE_FIELD_LOC(63, 32)
+#define UDWQE_IMMTDATA UDWQE_FIELD_LOC(95, 64)
+#define UDWQE_PD UDWQE_FIELD_LOC(119, 96)
+#define UDWQE_SGE_NUM UDWQE_FIELD_LOC(127, 120)
+#define UDWQE_MSG_START_SGE_IDX UDWQE_FIELD_LOC(151, 128)
+#define UDWQE_RSVD3 UDWQE_FIELD_LOC(158, 152)
+#define UDWQE_INLINE_TYPE UDWQE_FIELD_LOC(159, 159)
+#define UDWQE_RSVD4 UDWQE_FIELD_LOC(175, 160)
+#define UDWQE_UDPSPN UDWQE_FIELD_LOC(191, 176)
+#define UDWQE_QKEY UDWQE_FIELD_LOC(223, 192)
+#define UDWQE_DQPN UDWQE_FIELD_LOC(247, 224)
+#define UDWQE_RSVD5 UDWQE_FIELD_LOC(255, 248)
+#define UDWQE_VLAN UDWQE_FIELD_LOC(271, 256)
+#define UDWQE_HOPLIMIT UDWQE_FIELD_LOC(279, 272)
+#define UDWQE_TCLASS UDWQE_FIELD_LOC(287, 280)
+#define UDWQE_FLOW_LABEL UDWQE_FIELD_LOC(307, 288)
+#define UDWQE_SL UDWQE_FIELD_LOC(311, 308)
+#define UDWQE_PORTN UDWQE_FIELD_LOC(314, 312)
+#define UDWQE_RSVD6 UDWQE_FIELD_LOC(317, 315)
+#define UDWQE_UD_VLAN_EN UDWQE_FIELD_LOC(318, 318)
+#define UDWQE_LBI UDWQE_FIELD_LOC(319, 319)
+#define UDWQE_DMAC_L UDWQE_FIELD_LOC(351, 320)
+#define UDWQE_DMAC_H UDWQE_FIELD_LOC(367, 352)
+#define UDWQE_GMV_IDX UDWQE_FIELD_LOC(383, 368)
+#define UDWQE_DGID0 UDWQE_FIELD_LOC(415, 384)
+#define UDWQE_DGID1 UDWQE_FIELD_LOC(447, 416)
+#define UDWQE_DGID2 UDWQE_FIELD_LOC(479, 448)
+#define UDWQE_DGID3 UDWQE_FIELD_LOC(511, 480)
+
+#define UDWQE_INLINE_DATA_15_0 UDWQE_FIELD_LOC(63, 48)
+#define UDWQE_INLINE_DATA_23_16 UDWQE_FIELD_LOC(127, 120)
+#define UDWQE_INLINE_DATA_47_24 UDWQE_FIELD_LOC(151, 128)
+#define UDWQE_INLINE_DATA_63_48 UDWQE_FIELD_LOC(175, 160)
#define MAX_SERVICE_LEVEL 0x7
--
2.27.0

View File

@ -1,186 +0,0 @@
From 49263de90f77f218710ef45bc0377d3e2019d811 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Sat, 25 Dec 2021 17:42:54 +0800
Subject: libhns: Use new interfaces hr reg ***() to operate the DB field
Use hr_reg_xxx() to simply the codes for filling fields.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 59 ++++++++++++++------------------
providers/hns/hns_roce_u_hw_v2.h | 30 ++++++----------
2 files changed, 35 insertions(+), 54 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 0cff12b..e7dec0b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -291,10 +291,9 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
{
struct hns_roce_db rq_db = {};
- rq_db.byte_4 = htole32(qpn);
- roce_set_field(rq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
- HNS_ROCE_V2_RQ_DB);
- rq_db.parameter = htole32(rq_head);
+ hr_reg_write(&rq_db, DB_TAG, qpn);
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
+ hr_reg_write(&rq_db, DB_PI, rq_head);
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
}
@@ -304,12 +303,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
{
struct hns_roce_db sq_db = {};
- sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num);
- roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
- HNS_ROCE_V2_SQ_DB);
+ hr_reg_write(&sq_db, DB_TAG, qp->verbs_qp.qp.qp_num);
+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
- sq_db.parameter = htole32(qp->sq.head);
- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
}
@@ -336,14 +334,10 @@ static void update_cq_db(struct hns_roce_context *ctx,
{
struct hns_roce_db cq_db = {};
- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
- HNS_ROCE_V2_CQ_DB_PTR);
-
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M,
- DB_PARAM_CQ_CONSUMER_IDX_S, cq->cons_index);
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
- DB_PARAM_CQ_CMD_SN_S, 1);
+ hr_reg_write(&cq_db, DB_TAG, cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_PTR);
+ hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
}
@@ -663,7 +657,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
if (npolled || err == V2_CQ_POLL_ERR) {
if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
- *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M;
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
else
update_cq_db(ctx, cq);
}
@@ -679,24 +673,17 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
struct hns_roce_db cq_db = {};
uint32_t solicited_flag;
- uint32_t cmd_sn;
uint32_t ci;
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
- cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK;
solicited_flag = solicited ? HNS_ROCE_V2_CQ_DB_REQ_SOL :
HNS_ROCE_V2_CQ_DB_REQ_NEXT;
- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
- HNS_ROCE_V2_CQ_DB_NTR);
-
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M,
- DB_PARAM_CQ_CONSUMER_IDX_S, ci);
-
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
- DB_PARAM_CQ_CMD_SN_S, cmd_sn);
- roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag);
+ hr_reg_write(&cq_db, DB_TAG, cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NTR);
+ hr_reg_write(&cq_db, DB_CQ_CI, ci);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
@@ -1626,6 +1613,13 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
idx_que->head++;
}
+static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq)
+{
+ hr_reg_write(db, DB_TAG, srq->srqn);
+ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(db, DB_PI, srq->idx_que.head);
+}
+
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
struct ibv_recv_wr *wr,
struct ibv_recv_wr **bad_wr)
@@ -1665,10 +1659,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
*/
udma_to_device_barrier();
- srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S |
- srq->srqn);
- srq_db.parameter = htole32(srq->idx_que.head &
- DB_PARAM_SRQ_PRODUCER_COUNTER_M);
+ update_srq_db(&srq_db, srq);
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
(__le32 *)&srq_db);
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 4330b7d..e91b1f7 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -41,8 +41,6 @@ enum {
#define HNS_ROCE_V2_CQ_DB_REQ_SOL 1
#define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0
-#define HNS_ROCE_CMDSN_MASK 0x3
-
#define HNS_ROCE_SL_SHIFT 2
/* V2 REG DEFINITION */
@@ -127,27 +125,19 @@ struct hns_roce_db {
__le32 byte_4;
__le32 parameter;
};
-#define DB_BYTE_4_TAG_S 0
-#define DB_BYTE_4_TAG_M GENMASK(23, 0)
-
-#define DB_BYTE_4_CMD_S 24
-#define DB_BYTE_4_CMD_M GENMASK(27, 24)
-
-#define DB_BYTE_4_FLAG_S 31
-
-#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0
-#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0)
-
-#define DB_PARAM_SL_S 16
-#define DB_PARAM_SL_M GENMASK(18, 16)
-#define DB_PARAM_CQ_CONSUMER_IDX_S 0
-#define DB_PARAM_CQ_CONSUMER_IDX_M GENMASK(23, 0)
+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_db, h, l)
-#define DB_PARAM_CQ_NOTIFY_S 24
+#define DB_TAG DB_FIELD_LOC(23, 0)
+#define DB_CMD DB_FIELD_LOC(27, 24)
+#define DB_FLAG DB_FIELD_LOC(31, 31)
+#define DB_PI DB_FIELD_LOC(47, 32)
+#define DB_SL DB_FIELD_LOC(50, 48)
+#define DB_CQ_CI DB_FIELD_LOC(55, 32)
+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56)
+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57)
-#define DB_PARAM_CQ_CMD_SN_S 25
-#define DB_PARAM_CQ_CMD_SN_M GENMASK(26, 25)
+#define RECORD_DB_CI_MASK GENMASK(23, 0)
struct hns_roce_v2_cqe {
__le32 byte_4;
--
2.27.0

View File

@ -1,306 +0,0 @@
From 2da2a94f0ef5b6cf7fb8eacee1814a418d9bde74 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Sat, 25 Dec 2021 17:42:53 +0800
Subject: libhns: Add new interfaces hr reg ***() to operate the CQE field
Implement hr_reg_xxx() to simplify the code for filling or extracting
fields.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 53 +++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.c | 58 ++++++++++------------------
providers/hns/hns_roce_u_hw_v2.h | 66 ++++++++++++--------------------
3 files changed, 98 insertions(+), 79 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index c1ae1c9..df7f485 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -101,6 +101,59 @@
#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
+#define FIELD_LOC(field_type, field_h, field_l) \
+ field_type, field_h, \
+ field_l + BUILD_ASSERT_OR_ZERO(((field_h) / 32) == \
+ ((field_l) / 32))
+
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_ASSERT((field_h) == (field_l)); \
+ *((__le32 *)_ptr + (field_h) / 32) |= \
+ htole32(BIT((field_l) % 32)); \
+ })
+
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_ASSERT((field_h) >= (field_l)); \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ ~htole32(GENMASK((field_h) % 32, (field_l) % 32)); \
+ })
+
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
+
+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \
+ _hr_reg_clear(ptr, field_type, field_h, field_l);\
+ })
+
+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val)
+
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ const uint32_t _val = val; \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= htole32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), _val)); \
+ })
+
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
+
+#define _hr_reg_read(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_ASSERT((field_h) >= (field_l)); \
+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \
+ le32toh(*((__le32 *)_ptr + (field_h) / 32))); \
+ })
+
+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+
enum {
HNS_ROCE_QP_TABLE_BITS = 8,
HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index e7dec0b..558457a 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -187,8 +187,7 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
}
}
- wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M,
- CQE_BYTE_16_SUB_STATUS_S);
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
}
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
@@ -200,8 +199,8 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n)
{
struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe);
- return (!!(roce_get_bit(cqe->byte_4, CQE_BYTE_4_OWNER_S)) ^
- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL;
+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ?
+ cqe : NULL;
}
static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
@@ -257,8 +256,7 @@ static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
uint32_t srqn;
if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
- CQE_BYTE_12_XRC_SRQN_S);
+ srqn = hr_reg_read(cqe, CQE_XRC_SRQN);
*srq = hns_roce_find_srq(ctx, srqn);
if (!*srq)
@@ -438,15 +436,13 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
(opcode == HNS_ROCE_RECV_OP_SEND ||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
- (roce_get_bit(cqe->byte_4, CQE_BYTE_4_RQ_INLINE_S))) {
+ hr_reg_read(cqe, CQE_RQ_INLINE)) {
struct hns_roce_rinl_sge *sge_list;
uint32_t wr_num, wr_cnt, sge_num, data_len;
uint8_t *wqe_buf;
uint32_t sge_cnt, size;
- wr_num = (uint16_t)roce_get_field(cqe->byte_4,
- CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S) & 0xffff;
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX);
wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
sge_list = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sg_list;
@@ -477,13 +473,10 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
{
- wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
- CQE_BYTE_32_SL_S);
- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
- CQE_BYTE_32_RMT_QPN_S);
+ wc->sl = hr_reg_read(cqe, CQE_SL);
+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
wc->slid = 0;
- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
- IBV_WC_GRH : 0;
+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0;
wc->pkey_index = 0;
}
@@ -492,8 +485,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
{
uint32_t wqe_idx;
- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S);
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
hns_roce_free_srq_wqe(srq, wqe_idx);
}
@@ -533,8 +525,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
* according to the wqe idx in the current cqe first
*/
if (hr_qp->sq_signal_bits) {
- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S);
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
/* get the processed wqes num since last signalling */
wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
}
@@ -590,8 +581,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
udma_from_device_barrier();
- qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
- CQE_BYTE_16_LCL_QPN_S);
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
/* if cur qp is null, then could not get the correct qpn */
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
@@ -600,11 +590,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
return V2_CQ_POLL_ERR;
}
- status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
- CQE_BYTE_4_STATUS_S);
- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
- CQE_BYTE_4_OPCODE_S);
- is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ;
+ status = hr_reg_read(cqe, CQE_STATUS);
+ opcode = hr_reg_read(cqe, CQE_OPCODE);
+ is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
if (is_send) {
parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
} else {
@@ -1350,26 +1338,20 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
while ((int) --prod_index - (int) cq->cons_index >= 0) {
cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe);
- if (roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
- CQE_BYTE_16_LCL_QPN_S) == qpn) {
- is_recv_cqe = roce_get_bit(cqe->byte_4,
- CQE_BYTE_4_S_R_S);
+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
+ is_recv_cqe = hr_reg_read(cqe, CQE_S_R);
if (srq && is_recv_cqe) {
- wqe_index = roce_get_field(cqe->byte_4,
- CQE_BYTE_4_WQE_IDX_M,
- CQE_BYTE_4_WQE_IDX_S);
+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX);
hns_roce_free_srq_wqe(srq, wqe_index);
}
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(cq,
(prod_index + nfreed) & cq->ibv_cq.cqe);
- owner_bit = roce_get_bit(dest->byte_4,
- CQE_BYTE_4_OWNER_S);
+ owner_bit = hr_reg_read(dest, CQE_OWNER);
memcpy(dest, cqe, cq->cqe_size);
- roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S,
- owner_bit);
+ hr_reg_write_bool(dest, CQE_OWNER, owner_bit);
}
}
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index e91b1f7..92e5f1a 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -154,47 +154,31 @@ struct hns_roce_v2_cqe {
__le32 rsv[8];
};
-#define CQE_BYTE_4_OPCODE_S 0
-#define CQE_BYTE_4_OPCODE_M (((1UL << 5) - 1) << CQE_BYTE_4_OPCODE_S)
-
-#define CQE_BYTE_4_RQ_INLINE_S 5
-
-#define CQE_BYTE_4_S_R_S 6
-#define CQE_BYTE_4_OWNER_S 7
-
-#define CQE_BYTE_4_STATUS_S 8
-#define CQE_BYTE_4_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_4_STATUS_S)
-
-#define CQE_BYTE_4_WQE_IDX_S 16
-#define CQE_BYTE_4_WQE_IDX_M (((1UL << 16) - 1) << CQE_BYTE_4_WQE_IDX_S)
-
-#define CQE_BYTE_12_XRC_SRQN_S 0
-#define CQE_BYTE_12_XRC_SRQN_M (((1UL << 24) - 1) << CQE_BYTE_12_XRC_SRQN_S)
-
-#define CQE_BYTE_16_LCL_QPN_S 0
-#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S)
-
-#define CQE_BYTE_16_SUB_STATUS_S 24
-#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S)
-
-#define CQE_BYTE_28_SMAC_S 0
-#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S)
-
-#define CQE_BYTE_28_PORT_TYPE_S 16
-#define CQE_BYTE_28_PORT_TYPE_M (((1UL << 2) - 1) << CQE_BYTE_28_PORT_TYPE_S)
-
-#define CQE_BYTE_32_RMT_QPN_S 0
-#define CQE_BYTE_32_RMT_QPN_M (((1UL << 24) - 1) << CQE_BYTE_32_RMT_QPN_S)
-
-#define CQE_BYTE_32_SL_S 24
-#define CQE_BYTE_32_SL_M (((1UL << 3) - 1) << CQE_BYTE_32_SL_S)
-
-#define CQE_BYTE_32_PORTN_S 27
-#define CQE_BYTE_32_PORTN_M (((1UL << 3) - 1) << CQE_BYTE_32_PORTN_S)
-
-#define CQE_BYTE_32_GRH_S 30
-
-#define CQE_BYTE_32_LPK_S 31
+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
+
+#define CQE_OPCODE CQE_FIELD_LOC(4, 0)
+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5)
+#define CQE_S_R CQE_FIELD_LOC(6, 6)
+#define CQE_OWNER CQE_FIELD_LOC(7, 7)
+#define CQE_STATUS CQE_FIELD_LOC(15, 8)
+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
+#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
+#define CQE_SMAC CQE_FIELD_LOC(207, 160)
+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208)
+#define CQE_VID CQE_FIELD_LOC(221, 210)
+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222)
+#define CQE_RSV2 CQE_FIELD_LOC(223, 223)
+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224)
+#define CQE_SL CQE_FIELD_LOC(250, 248)
+#define CQE_PORTN CQE_FIELD_LOC(253, 251)
+#define CQE_GRH CQE_FIELD_LOC(254, 254)
+#define CQE_LPK CQE_FIELD_LOC(255, 255)
+#define CQE_RSV3 CQE_FIELD_LOC(511, 256)
struct hns_roce_rc_sq_wqe {
__le32 byte_4;
--
2.27.0

View File

@ -1,202 +0,0 @@
From 48e8ca01b1e5d033fca6e988d2d280846c95d7e1 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Fri, 31 Dec 2021 18:01:06 +0800
Subject: libhns: Fix the calculation of QP/SRQ table size
The table_size means the maximum number of QP/SRQ. This value may not be
a power of two. The old algorithm will lead to a result that allocates a
mismatched table.
Fixes: 887b78c80224 ("libhns: Add initial main frame")
Fixes: 9e3df7578153 ("libhns: Support ibv_create_srq_ex")
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
providers/hns/hns_roce_u.c | 18 +++++++++++-------
providers/hns/hns_roce_u.h | 20 ++++++++++++++------
providers/hns/hns_roce_u_hw_v1.c | 4 ++--
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
providers/hns/hns_roce_u_verbs.c | 9 ++++-----
5 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 9dc4905..6eac4ff 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -92,6 +92,13 @@ static const struct verbs_context_ops hns_common_ops = {
.get_srq_num = hns_roce_u_get_srq_num,
};
+static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
+{
+ uint32_t count_shift = hr_ilog32(entry_count);
+
+ return count_shift > size_shift ? count_shift - size_shift : 0;
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -120,18 +127,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
else
context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
- context->num_qps = resp.qp_tab_size;
- context->num_srqs = resp.srq_tab_size;
-
- context->qp_table_shift = ffs(context->num_qps) - 1 -
- HNS_ROCE_QP_TABLE_BITS;
+ context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
+ HNS_ROCE_QP_TABLE_BITS);
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
pthread_mutex_init(&context->qp_table_mutex, NULL);
for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
context->qp_table[i].refcnt = 0;
- context->srq_table_shift = ffs(context->num_srqs) - 1 -
- HNS_ROCE_SRQ_TABLE_BITS;
+ context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
+ HNS_ROCE_SRQ_TABLE_BITS);
context->srq_table_mask = (1 << context->srq_table_shift) - 1;
pthread_mutex_init(&context->srq_table_mutex, NULL);
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index df7f485..9366923 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -154,10 +154,8 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
-enum {
- HNS_ROCE_QP_TABLE_BITS = 8,
- HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
-};
+#define HNS_ROCE_QP_TABLE_BITS 8
+#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
#define HNS_ROCE_SRQ_TABLE_BITS 8
#define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
@@ -211,7 +209,6 @@ struct hns_roce_context {
int refcnt;
} qp_table[HNS_ROCE_QP_TABLE_SIZE];
pthread_mutex_t qp_table_mutex;
- uint32_t num_qps;
uint32_t qp_table_shift;
uint32_t qp_table_mask;
@@ -220,7 +217,6 @@ struct hns_roce_context {
int refcnt;
} srq_table[HNS_ROCE_SRQ_TABLE_SIZE];
pthread_mutex_t srq_table_mutex;
- uint32_t num_srqs;
uint32_t srq_table_shift;
uint32_t srq_table_mask;
@@ -382,6 +378,18 @@ static inline unsigned int hr_ilog32(unsigned int count)
return ilog32(count - 1);
}
+static inline uint32_t to_hr_qp_table_index(uint32_t qpn,
+ struct hns_roce_context *ctx)
+{
+ return (qpn >> ctx->qp_table_shift) & (HNS_ROCE_QP_TABLE_SIZE - 1);
+}
+
+static inline uint32_t to_hr_srq_table_index(uint32_t srqn,
+ struct hns_roce_context *ctx)
+{
+ return (srqn >> ctx->srq_table_shift) & (HNS_ROCE_SRQ_TABLE_SIZE - 1);
+}
+
static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev)
{
return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device);
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 838e004..28ad482 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
uint32_t qpn)
{
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
if (ctx->qp_table[tind].refcnt) {
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
{
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
if (!--ctx->qp_table[tind].refcnt)
free(ctx->qp_table[tind].table);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 558457a..e39ee7f 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -343,7 +343,7 @@ static void update_cq_db(struct hns_roce_context *ctx,
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
uint32_t qpn)
{
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
if (ctx->qp_table[tind].refcnt)
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
@@ -354,7 +354,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
{
uint32_t qpn = qp->verbs_qp.qp.qp_num;
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
pthread_mutex_lock(&ctx->qp_table_mutex);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 557d075..5ccb701 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -431,8 +431,7 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
static int hns_roce_store_srq(struct hns_roce_context *ctx,
struct hns_roce_srq *srq)
{
- uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
- ctx->srq_table_shift;
+ uint32_t tind = to_hr_srq_table_index(srq->srqn, ctx);
pthread_mutex_lock(&ctx->srq_table_mutex);
@@ -457,7 +456,7 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx,
uint32_t srqn)
{
- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
+ uint32_t tind = to_hr_srq_table_index(srqn, ctx);
if (ctx->srq_table[tind].refcnt)
return ctx->srq_table[tind].table[srqn & ctx->srq_table_mask];
@@ -467,7 +466,7 @@ struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx,
static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
{
- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
+ uint32_t tind = to_hr_srq_table_index(srqn, ctx);
pthread_mutex_lock(&ctx->srq_table_mutex);
@@ -1108,7 +1107,7 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
struct hns_roce_qp *qp)
{
uint32_t qpn = qp->verbs_qp.qp.qp_num;
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
pthread_mutex_lock(&ctx->qp_table_mutex);
if (!ctx->qp_table[tind].refcnt) {
--
2.27.0

View File

@ -1,31 +0,0 @@
From d4766cd11b985f7f798410129a0b204beb13ecef Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Mon, 17 Jan 2022 20:43:39 +0800
Subject: libhns: Fix wrong HIP08 version macro
The version macro of HIP08 should be consistent with the version number
queried from the hardware.
Fixes: b8cb140e9cd6 ("libhns: Refresh version info before using it")
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 9366923..2b4ba18 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -48,8 +48,7 @@
#include "hns_roce_u_abi.h"
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
-
-#define HNS_ROCE_HW_VER2 ('h' << 24 | 'i' << 16 | '0' << 8 | '8')
+#define HNS_ROCE_HW_VER2 0x100
#define HNS_ROCE_HW_VER3 0x130
#define PFX "hns: "
--
2.27.0

View File

@ -1,115 +0,0 @@
From 203675526b14d9556eeb4212536ebcfc81691c1b Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Mon, 17 Jan 2022 20:43:38 +0800
Subject: libhns: Fix out-of-bounds write when filling inline data into
extended sge space
If the buf to store inline data is in the last page of the extended sge
space, filling the entire inline data into the extended sge space at one
time may result in out-of-bounds writing.
When the remaining space at the end of the extended sge is not enough to
accommodate the entire inline data, the inline data needs to be filled
into the extended sge space in two steps:
(1) The front part of the inline data is filled into the remaining space
at the end of the extended sge.
(2) The remaining inline data is filled into the header space of the
extended sge.
Fixes: b7814b7b9715("libhns: Support inline data in extented sge space for RC")
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 40 ++++++++++++++++++++++----------
1 file changed, 28 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index e39ee7f..20745dc 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -772,21 +772,43 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
struct hns_roce_sge_info *sge_info)
{
unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg);
- void *dseg;
+ unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
+ void *dst_addr, *src_addr, *tail_bound_addr;
+ uint32_t src_len, tail_len;
int i;
+
if (sge_info->total_len > qp->sq.max_gs * sge_sz)
return EINVAL;
- dseg = get_send_sge_ex(qp, sge_info->start_idx);
+ dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
+ tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask);
for (i = 0; i < wr->num_sge; i++) {
- memcpy(dseg, (void *)(uintptr_t)wr->sg_list[i].addr,
- wr->sg_list[i].length);
- dseg += wr->sg_list[i].length;
+ tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr;
+
+ src_addr = (void *)(uintptr_t)wr->sg_list[i].addr;
+ src_len = wr->sg_list[i].length;
+
+ if (src_len < tail_len) {
+ memcpy(dst_addr, src_addr, src_len);
+ dst_addr += src_len;
+ } else if (src_len == tail_len) {
+ memcpy(dst_addr, src_addr, src_len);
+ dst_addr = get_send_sge_ex(qp, 0);
+ } else {
+ memcpy(dst_addr, src_addr, tail_len);
+ dst_addr = get_send_sge_ex(qp, 0);
+ src_addr += tail_len;
+ src_len -= tail_len;
+
+ memcpy(dst_addr, src_addr, src_len);
+ dst_addr += src_len;
+ }
}
- sge_info->start_idx += DIV_ROUND_UP(sge_info->total_len, sge_sz);
+ sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, sge_sz);
+ sge_info->start_idx += sge_info->valid_num;
return 0;
}
@@ -828,7 +850,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
struct hns_roce_ud_sq_wqe *ud_sq_wqe,
struct hns_roce_sge_info *sge_info)
{
- unsigned int sge_idx = sge_info->start_idx;
int ret;
if (!check_inl_data_len(qp, sge_info->total_len))
@@ -845,8 +866,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
if (ret)
return ret;
- sge_info->valid_num = sge_info->start_idx - sge_idx;
-
hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
}
@@ -969,7 +988,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
struct hns_roce_rc_sq_wqe *rc_sq_wqe,
struct hns_roce_sge_info *sge_info)
{
- unsigned int sge_idx = sge_info->start_idx;
void *dseg = rc_sq_wqe;
int ret;
int i;
@@ -997,8 +1015,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
if (ret)
return ret;
- sge_info->valid_num = sge_info->start_idx - sge_idx;
-
hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
}
--
2.27.0

View File

@ -1,171 +0,0 @@
From 85a5aa79327f45e4bea8d7ad0e55842225ca676a Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 18 Jan 2022 19:58:51 +0800
Subject: libhns: Clear remaining unused sges when post recv
The HIP09 requires the driver to clear the unused data segments in wqe
buffer to make the hns ROCEE stop reading the remaining invalid sges for
RQ.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 88 ++++++++++++++------------------
1 file changed, 39 insertions(+), 49 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 20745dc..6b0d7f1 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -85,14 +85,6 @@ static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = htole32(sg->length);
}
-/* Fill an ending sge to make hw stop reading the remaining sges in wqe */
-static inline void set_ending_data_seg(struct hns_roce_v2_wqe_data_seg *dseg)
-{
- dseg->lkey = htole32(0x0);
- dseg->addr = 0;
- dseg->len = htole32(INVALID_SGE_LENGTH);
-}
-
static void set_extend_atomic_seg(struct hns_roce_qp *qp, unsigned int sge_cnt,
struct hns_roce_sge_info *sge_info, void *buf)
{
@@ -1247,23 +1239,43 @@ static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx)
return 0;
}
-static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
- unsigned int wqe_idx)
+static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
+ unsigned int max_sge, bool rsv)
{
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_rinl_sge *sge_list;
- int i;
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ unsigned int i, cnt;
- dseg = get_recv_wqe_v2(qp, wqe_idx);
- for (i = 0; i < wr->num_sge; i++) {
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
if (!wr->sg_list[i].length)
continue;
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
+
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
}
- if (qp->rq.rsv_sge)
- set_ending_data_seg(dseg);
+ /* Fill a reserved sge to make ROCEE stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = 0;
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = htole32(INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
+ unsigned int wqe_idx, unsigned int max_sge)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ unsigned int i;
+ void *wqe;
+
+ wqe = get_recv_wqe_v2(qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, qp->rq.rsv_sge);
if (!qp->rq_rinl_buf.wqe_cnt)
return;
@@ -1310,7 +1322,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
}
wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1);
- fill_rq_wqe(qp, wr, wqe_idx);
+ fill_rq_wqe(qp, wr, wqe_idx, max_sge);
qp->rq.wrid[wqe_idx] = wr->wr_id;
}
@@ -1536,10 +1548,8 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
}
static int check_post_srq_valid(struct hns_roce_srq *srq,
- struct ibv_recv_wr *wr)
+ struct ibv_recv_wr *wr, unsigned int max_sge)
{
- unsigned int max_sge = srq->max_gs - srq->rsv_sge;
-
if (hns_roce_v2_srqwq_overflow(srq))
return -ENOMEM;
@@ -1575,28 +1585,6 @@ static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
return 0;
}
-static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
- struct ibv_recv_wr *wr)
-{
- struct hns_roce_v2_wqe_data_seg *dseg;
- int i;
-
- dseg = get_srq_wqe(srq, wqe_idx);
-
- for (i = 0; i < wr->num_sge; ++i) {
- dseg[i].len = htole32(wr->sg_list[i].length);
- dseg[i].lkey = htole32(wr->sg_list[i].lkey);
- dseg[i].addr = htole64(wr->sg_list[i].addr);
- }
-
- /* hw stop reading when identify the last one */
- if (srq->rsv_sge) {
- dseg[i].len = htole32(INVALID_SGE_LENGTH);
- dseg[i].lkey = htole32(0x0);
- dseg[i].addr = 0;
- }
-}
-
static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
@@ -1624,15 +1612,16 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
{
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ unsigned int wqe_idx, max_sge, nreq;
struct hns_roce_db srq_db;
- unsigned int wqe_idx;
int ret = 0;
- int nreq;
+ void *wqe;
pthread_spin_lock(&srq->lock);
+ max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- ret = check_post_srq_valid(srq, wr);
+ ret = check_post_srq_valid(srq, wr, max_sge);
if (ret) {
*bad_wr = wr;
break;
@@ -1644,7 +1633,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
break;
}
- fill_srq_wqe(srq, wqe_idx, wr);
+ wqe = get_srq_wqe(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
--
2.27.0

View File

@ -1,346 +0,0 @@
From d8596eff4eb46d1db1b6066e3bbbd03976f49e58 Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Mon, 7 Mar 2022 18:49:35 +0800
Subject: libhns: Add support for creating extended CQ
The driver supports ibv_create_cq_ex() to create extended CQ. But the
driver does not yet support the extended attributes specified by
attr->com_mask and attr->wc_flas.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.c | 1 +
providers/hns/hns_roce_u.h | 6 ++-
providers/hns/hns_roce_u_abi.h | 5 +++
providers/hns/hns_roce_u_hw_v1.c | 20 +++++-----
providers/hns/hns_roce_u_hw_v2.c | 16 ++++----
providers/hns/hns_roce_u_verbs.c | 63 ++++++++++++++++++++++----------
6 files changed, 72 insertions(+), 39 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 6eac4ff0..f8a647b8 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -66,6 +66,7 @@ static const struct verbs_context_ops hns_common_ops = {
.bind_mw = hns_roce_u_bind_mw,
.cq_event = hns_roce_u_cq_event,
.create_cq = hns_roce_u_create_cq,
+ .create_cq_ex = hns_roce_u_create_cq_ex,
.create_qp = hns_roce_u_create_qp,
.create_qp_ex = hns_roce_u_create_qp_ex,
.dealloc_mw = hns_roce_u_dealloc_mw,
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 2b4ba181..505e7498 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -236,7 +236,7 @@ struct hns_roce_pd {
};
struct hns_roce_cq {
- struct ibv_cq ibv_cq;
+ struct verbs_cq verbs_cq;
struct hns_roce_buf buf;
pthread_spinlock_t lock;
unsigned int cqn;
@@ -406,7 +406,7 @@ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
{
- return container_of(ibv_cq, struct hns_roce_cq, ibv_cq);
+ return container_of(ibv_cq, struct hns_roce_cq, verbs_cq.cq);
}
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
@@ -447,6 +447,8 @@ int hns_roce_u_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
struct ibv_comp_channel *channel,
int comp_vector);
+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr);
int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
int hns_roce_u_destroy_cq(struct ibv_cq *cq);
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index e56f9d35..333f977e 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -39,8 +39,13 @@
DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
empty, hns_roce_ib_alloc_pd_resp);
+
DECLARE_DRV_CMD(hns_roce_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
+
+DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
+
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
empty, hns_roce_ib_alloc_ucontext_resp);
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index 28ad482c..d47cba0c 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -161,10 +161,10 @@ static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry)
static void *get_sw_cqe(struct hns_roce_cq *cq, int n)
{
- struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
+ struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe);
return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^
- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL;
+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL;
}
static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq)
@@ -210,7 +210,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
cur = wq->head - wq->tail;
pthread_spin_unlock(&cq->lock);
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
"wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
wq->head, wq->tail, wq->max_post, nreq);
@@ -274,10 +274,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
if (!*cur_qp ||
(local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->verbs_qp.qp.qp_num) {
- *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context),
+ *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->verbs_cq.cq.context),
qpn & 0xffffff);
if (!*cur_qp) {
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
PFX "can't find qp!\n");
return CQ_POLL_ERR;
}
@@ -317,7 +317,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
if (roce_get_field(cqe->cqe_byte_4,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) {
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
PFX "error cqe!\n");
hns_roce_handle_error_cqe(cqe, wc);
return CQ_OK;
@@ -599,21 +599,21 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
uint32_t prod_index;
uint8_t owner_bit = 0;
struct hns_roce_cqe *cqe, *dest;
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index);
++prod_index)
- if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
+ if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe)
break;
while ((int) --prod_index - (int) cq->cons_index >= 0) {
- cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
+ cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe);
if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) {
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq,
- (prod_index + nfreed) & cq->ibv_cq.cqe);
+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe);
owner_bit = roce_get_bit(dest->cqe_byte_4,
CQE_BYTE_4_OWNER_S);
memcpy(dest, cqe, sizeof(*cqe));
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index bfd98760..07f3596d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -189,10 +189,10 @@ static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n)
{
- struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe);
+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->verbs_cq.cq.cqe);
- return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ?
- cqe : NULL;
+ return (hr_reg_read(cqe, CQE_OWNER) ^
+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL;
}
static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
@@ -556,7 +556,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
{
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
struct hns_roce_srq *srq = NULL;
struct hns_roce_v2_cqe *cqe;
uint8_t opcode;
@@ -1356,15 +1356,15 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
uint16_t wqe_index;
uint32_t prod_index;
struct hns_roce_v2_cqe *cqe, *dest;
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index);
++prod_index)
- if (prod_index > cq->cons_index + cq->ibv_cq.cqe)
+ if (prod_index > cq->cons_index + cq->verbs_cq.cq.cqe)
break;
while ((int) --prod_index - (int) cq->cons_index >= 0) {
- cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe);
+ cqe = get_cqe_v2(cq, prod_index & cq->verbs_cq.cq.cqe);
if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
is_recv_cqe = hr_reg_read(cqe, CQE_S_R);
@@ -1375,7 +1375,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(cq,
- (prod_index + nfreed) & cq->ibv_cq.cqe);
+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe);
owner_bit = hr_reg_read(dest, CQE_OWNER);
memcpy(dest, cqe, cq->cqe_size);
hr_reg_write_bool(dest, CQE_OWNER, owner_bit);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 3cc9e0c2..a993c39a 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -276,12 +276,17 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
return 0;
}
-static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context)
+static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
+ struct hns_roce_context *context)
{
- if (*cqe < 1 || *cqe > context->max_cqe)
+ if (!attr->cqe || attr->cqe > context->max_cqe)
return -EINVAL;
- *cqe = max((uint64_t)HNS_ROCE_MIN_CQE_NUM, roundup_pow_of_two(*cqe));
+ if (attr->comp_mask || attr->wc_flags)
+ return -EOPNOTSUPP;
+
+ attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
+ roundup_pow_of_two(attr->cqe));
return 0;
}
@@ -297,25 +302,25 @@ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
}
static int exec_cq_create_cmd(struct ibv_context *context,
- struct hns_roce_cq *cq, int cqe,
- struct ibv_comp_channel *channel, int comp_vector)
+ struct hns_roce_cq *cq,
+ struct ibv_cq_init_attr_ex *attr)
{
+ struct hns_roce_create_cq_ex_resp resp_ex = {};
struct hns_roce_ib_create_cq_resp *resp_drv;
- struct hns_roce_create_cq_resp resp = {};
+ struct hns_roce_create_cq_ex cmd_ex = {};
struct hns_roce_ib_create_cq *cmd_drv;
- struct hns_roce_create_cq cmd = {};
int ret;
- cmd_drv = &cmd.drv_payload;
- resp_drv = &resp.drv_payload;
+ cmd_drv = &cmd_ex.drv_payload;
+ resp_drv = &resp_ex.drv_payload;
cmd_drv->buf_addr = (uintptr_t)cq->buf.buf;
cmd_drv->db_addr = (uintptr_t)cq->db;
cmd_drv->cqe_size = (uintptr_t)cq->cqe_size;
- ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
- &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd),
- &resp.ibv_resp, sizeof(resp));
+ ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq,
+ &cmd_ex.ibv_cmd, sizeof(cmd_ex),
+ &resp_ex.ibv_resp, sizeof(resp_ex), 0);
if (ret)
return ret;
@@ -325,16 +330,15 @@ static int exec_cq_create_cmd(struct ibv_context *context,
return 0;
}
-struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
- struct ibv_comp_channel *channel,
- int comp_vector)
+static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr)
{
struct hns_roce_device *hr_dev = to_hr_dev(context->device);
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_cq *cq;
int ret;
- ret = hns_roce_verify_cq(&cqe, hr_ctx);
+ ret = verify_cq_create_attr(attr, hr_ctx);
if (ret)
goto err;
@@ -348,7 +352,7 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
if (ret)
goto err_lock;
- cq->cq_depth = cqe;
+ cq->cq_depth = attr->cqe;
cq->cqe_size = hr_ctx->cqe_size;
ret = hns_roce_alloc_cq_buf(cq);
@@ -363,13 +367,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
*cq->db = 0;
- ret = exec_cq_create_cmd(context, cq, cqe, channel, comp_vector);
+ ret = exec_cq_create_cmd(context, cq, attr);
if (ret)
goto err_cmd;
cq->arm_sn = 1;
- return &cq->ibv_cq;
+ return &cq->verbs_cq.cq_ex;
err_cmd:
if (hr_dev->hw_version != HNS_ROCE_HW_VER1)
@@ -387,6 +391,27 @@ err:
return NULL;
}
+struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector)
+{
+ struct ibv_cq_ex *cq;
+ struct ibv_cq_init_attr_ex attr = {
+ .cqe = cqe,
+ .channel = channel,
+ .comp_vector = comp_vector,
+ };
+
+ cq = create_cq(context, &attr);
+ return cq ? ibv_cq_ex_to_cq(cq) : NULL;
+}
+
+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr)
+{
+ return create_cq(context, attr);
+}
+
void hns_roce_u_cq_event(struct ibv_cq *cq)
{
to_hr_cq(cq)->arm_sn++;
--
2.30.0

View File

@ -1,415 +0,0 @@
From 0464e0cb0416d679aba3b58261bbd2cadb74fd03 Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Mon, 7 Mar 2022 18:49:36 +0800
Subject: libhns: Extended CQ supports the new polling mechanism
ofed provides new polling APIs for extended CQ. With the new APIs, users
can poll the extended CQ more efficiently.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_hw_v2.c | 319 +++++++++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.h | 1 +
providers/hns/hns_roce_u_verbs.c | 18 +-
4 files changed, 337 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 505e7498..70ac6e5b 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -247,6 +247,7 @@ struct hns_roce_cq {
int arm_sn;
unsigned long flags;
unsigned int cqe_size;
+ struct hns_roce_v2_cqe *cqe;
};
struct hns_roce_idx_que {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 07f3596d..081ab1f3 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1657,6 +1657,325 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
return ret;
}
+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
+ struct hns_roce_cq *cq)
+{
+ struct hns_roce_wq *wq = &hr_qp->sq;
+
+ if (hr_qp->sq_signal_bits)
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
+
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+}
+
+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
+ struct hns_roce_cq *cq)
+{
+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
+}
+
+static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
+{
+ struct hns_roce_wq *wq = &hr_qp->rq;
+
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+}
+
+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
+ struct hns_roce_cq *cq)
+{
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
+ struct hns_roce_srq *srq = NULL;
+ uint32_t wqe_idx;
+
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
+ cqe_proc_sq(qp, wqe_idx, cq);
+ } else {
+ if (get_srq_from_cqe(cqe, ctx, qp, &srq))
+ return V2_CQ_POLL_ERR;
+
+ if (srq)
+ cqe_proc_srq(srq, wqe_idx, cq);
+ else
+ cqe_proc_rq(qp, cq);
+ }
+ return 0;
+}
+
+static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status)
+{
+ int i;
+
+ static const struct {
+ unsigned int cqe_status;
+ enum ibv_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR,
+ IBV_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
+ };
+
+ cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
+ if (status == map[i].cqe_status) {
+ cq->verbs_cq.cq_ex.status = map[i].wc_status;
+ break;
+ }
+ }
+}
+
+static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
+{
+ struct hns_roce_qp *qp = NULL;
+ struct hns_roce_v2_cqe *cqe;
+ uint8_t status;
+ uint32_t qpn;
+
+ cqe = next_cqe_sw_v2(cq);
+ if (!cqe)
+ return ENOENT;
+
+ ++cq->cons_index;
+ udma_from_device_barrier();
+
+ cq->cqe = cqe;
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
+
+ qp = hns_roce_v2_find_qp(ctx, qpn);
+ if (!qp)
+ return V2_CQ_POLL_ERR;
+
+ if (cqe_proc_wq(ctx, qp, cq))
+ return V2_CQ_POLL_ERR;
+
+ status = hr_reg_read(cqe, CQE_STATUS);
+
+ /*
+ * once a cqe in error status, the driver needs to help the HW to
+ * generated flushed cqes for all subsequent wqes
+ */
+ if (status != HNS_ROCE_V2_CQE_SUCCESS) {
+ handle_error_cqe_ex(cq, status);
+ return hns_roce_flush_cqe(qp, status);
+ }
+
+ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS;
+
+ return V2_CQ_OK;
+}
+
+static int wc_start_poll_cq(struct ibv_cq_ex *current,
+ struct ibv_poll_cq_attr *attr)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ int err;
+
+ if (attr->comp_mask)
+ return EINVAL;
+
+ pthread_spin_lock(&cq->lock);
+
+ err = wc_poll_cqe(ctx, cq);
+ if (err != V2_CQ_OK)
+ pthread_spin_unlock(&cq->lock);
+
+ return err;
+}
+
+static int wc_next_poll_cq(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ int err;
+
+ err = wc_poll_cqe(ctx, cq);
+ if (err != V2_CQ_OK)
+ return err;
+
+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
+ else
+ update_cq_db(ctx, cq);
+
+ return 0;
+}
+
+static void wc_end_poll_cq(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
+
+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
+ else
+ update_cq_db(ctx, cq);
+
+ pthread_spin_unlock(&cq->lock);
+}
+
+static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE);
+
+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ)
+ return wc_send_op_map[opcode];
+ else
+ return wc_rcv_op_map[opcode];
+}
+
+static uint32_t wc_read_vendor_err(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return hr_reg_read(cq->cqe, CQE_SUB_STATUS);
+}
+
+static uint32_t wc_read_byte_len(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return le32toh(cq->cqe->byte_cnt);
+}
+
+static __be32 wc_read_imm_data(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ if (hr_reg_read(cq->cqe, CQE_OPCODE) == HNS_ROCE_RECV_OP_SEND_WITH_INV)
+ /* This is returning invalidate_rkey which is in host order, see
+ * ibv_wc_read_invalidated_rkey.
+ */
+ return (__force __be32)le32toh(cq->cqe->rkey);
+
+ return htobe32(le32toh(cq->cqe->immtdata));
+}
+
+static uint32_t wc_read_qp_num(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return hr_reg_read(cq->cqe, CQE_LCL_QPN);
+}
+
+static uint32_t wc_read_src_qp(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return hr_reg_read(cq->cqe, CQE_RMT_QPN);
+}
+
+static unsigned int get_wc_flags_for_sq(uint8_t opcode)
+{
+ switch (opcode) {
+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
+ return IBV_WC_WITH_IMM;
+ case HNS_ROCE_SQ_OP_LOCAL_INV:
+ return IBV_WC_WITH_INV;
+ default:
+ return 0;
+ }
+}
+
+static unsigned int get_wc_flags_for_rq(uint8_t opcode)
+{
+ switch (opcode) {
+ case HNS_ROCE_RECV_OP_RDMA_WRITE_IMM:
+ case HNS_ROCE_RECV_OP_SEND_WITH_IMM:
+ return IBV_WC_WITH_IMM;
+ case HNS_ROCE_RECV_OP_SEND_WITH_INV:
+ return IBV_WC_WITH_INV;
+ default:
+ return 0;
+ }
+}
+
+static unsigned int wc_read_wc_flags(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE);
+ unsigned int wc_flags;
+
+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) {
+ wc_flags = get_wc_flags_for_sq(opcode);
+ } else {
+ wc_flags = get_wc_flags_for_rq(opcode);
+ wc_flags |= hr_reg_read(cq->cqe, CQE_GRH) ? IBV_WC_GRH : 0;
+ }
+
+ return wc_flags;
+}
+
+static uint32_t wc_read_slid(struct ibv_cq_ex *current)
+{
+ return 0;
+}
+
+static uint8_t wc_read_sl(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return (uint8_t)hr_reg_read(cq->cqe, CQE_SL);
+}
+
+static uint8_t wc_read_dlid_path_bits(struct ibv_cq_ex *current)
+{
+ return 0;
+}
+
+static uint16_t wc_read_cvlan(struct ibv_cq_ex *current)
+{
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+
+ return hr_reg_read(cq->cqe, CQE_VID_VLD) ?
+ hr_reg_read(cq->cqe, CQE_VID) : 0;
+}
+
+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags)
+{
+ cq_ex->start_poll = wc_start_poll_cq;
+ cq_ex->next_poll = wc_next_poll_cq;
+ cq_ex->end_poll = wc_end_poll_cq;
+ cq_ex->read_opcode = wc_read_opcode;
+ cq_ex->read_vendor_err = wc_read_vendor_err;
+ cq_ex->read_wc_flags = wc_read_wc_flags;
+
+ if (wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+ cq_ex->read_byte_len = wc_read_byte_len;
+ if (wc_flags & IBV_WC_EX_WITH_IMM)
+ cq_ex->read_imm_data = wc_read_imm_data;
+ if (wc_flags & IBV_WC_EX_WITH_QP_NUM)
+ cq_ex->read_qp_num = wc_read_qp_num;
+ if (wc_flags & IBV_WC_EX_WITH_SRC_QP)
+ cq_ex->read_src_qp = wc_read_src_qp;
+ if (wc_flags & IBV_WC_EX_WITH_SLID)
+ cq_ex->read_slid = wc_read_slid;
+ if (wc_flags & IBV_WC_EX_WITH_SL)
+ cq_ex->read_sl = wc_read_sl;
+ if (wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+ cq_ex->read_dlid_path_bits = wc_read_dlid_path_bits;
+ if (wc_flags & IBV_WC_EX_WITH_CVLAN)
+ cq_ex->read_cvlan = wc_read_cvlan;
+}
+
const struct hns_roce_u_hw hns_roce_u_hw_v2 = {
.hw_version = HNS_ROCE_HW_VER2,
.hw_ops = {
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 92e5f1a4..0068f4fe 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -337,5 +337,6 @@ struct hns_roce_ud_sq_wqe {
#define MAX_SERVICE_LEVEL 0x7
void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
#endif /* _HNS_ROCE_U_HW_V2_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index a993c39a..9ea8a6d3 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -276,13 +276,21 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
return 0;
}
+enum {
+ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
+ IBV_WC_EX_WITH_CVLAN,
+};
+
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
struct hns_roce_context *context)
{
if (!attr->cqe || attr->cqe > context->max_cqe)
return -EINVAL;
- if (attr->comp_mask || attr->wc_flags)
+ if (attr->comp_mask)
+ return -EOPNOTSUPP;
+
+ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
return -EOPNOTSUPP;
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
@@ -409,7 +417,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
struct ibv_cq_init_attr_ex *attr)
{
- return create_cq(context, attr);
+ struct ibv_cq_ex *cq;
+
+ cq = create_cq(context, attr);
+ if (cq)
+ hns_roce_attach_cq_ex_ops(cq, attr->wc_flags);
+
+ return cq;
}
void hns_roce_u_cq_event(struct ibv_cq *cq)
--
2.30.0

View File

@ -1,160 +0,0 @@
From 2d48954e9b2617cb48f7d5ba47a10ceda4e556ff Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Mon, 7 Mar 2022 18:49:37 +0800
Subject: libhns: Optimize the error handling of CQE
Separate the acquisition of wc->status and wc->vendor_err to make the logic
of error handling clearer.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 81 ++++++++------------------------
1 file changed, 19 insertions(+), 62 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 081ab1f3..2804450d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -146,13 +146,13 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return 0;
}
-static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- uint8_t status)
+static enum ibv_wc_status get_wc_status(uint8_t status)
{
static const struct {
unsigned int cqe_status;
enum ibv_wc_status wc_status;
} map[] = {
+ { HNS_ROCE_V2_CQE_SUCCESS, IBV_WC_SUCCESS },
{ HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
{ HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
{ HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
@@ -169,17 +169,12 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
{ HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
};
- int i;
-
- wc->status = IBV_WC_GENERAL_ERR;
- for (i = 0; i < ARRAY_SIZE(map); i++) {
- if (status == map[i].cqe_status) {
- wc->status = map[i].wc_status;
- break;
- }
+ for (int i = 0; i < ARRAY_SIZE(map); i++) {
+ if (status == map[i].cqe_status)
+ return map[i].wc_status;
}
- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
+ return IBV_WC_GENERAL_ERR;
}
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
@@ -581,7 +576,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
return V2_CQ_POLL_ERR;
}
- status = hr_reg_read(cqe, CQE_STATUS);
opcode = hr_reg_read(cqe, CQE_OPCODE);
is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
if (is_send) {
@@ -603,18 +597,18 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
wc->qp_num = qpn;
+ status = hr_reg_read(cqe, CQE_STATUS);
+ wc->status = get_wc_status(status);
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
+
+ if (status == HNS_ROCE_V2_CQE_SUCCESS)
+ return V2_CQ_OK;
+
/*
* once a cqe in error status, the driver needs to help the HW to
* generated flushed cqes for all subsequent wqes
*/
- if (status != HNS_ROCE_V2_CQE_SUCCESS) {
- handle_error_cqe(cqe, wc, status);
- return hns_roce_flush_cqe(*cur_qp, status);
- }
-
- wc->status = IBV_WC_SUCCESS;
-
- return V2_CQ_OK;
+ return hns_roce_flush_cqe(*cur_qp, status);
}
static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
@@ -1706,40 +1700,6 @@ static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
return 0;
}
-static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status)
-{
- int i;
-
- static const struct {
- unsigned int cqe_status;
- enum ibv_wc_status wc_status;
- } map[] = {
- { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
- { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
- { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
- { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
- { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
- { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
- { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
- { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
- { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
- { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
- { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR,
- IBV_WC_RETRY_EXC_ERR },
- { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
- { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
- { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
- };
-
- cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR;
- for (i = 0; i < ARRAY_SIZE(map); i++) {
- if (status == map[i].cqe_status) {
- cq->verbs_cq.cq_ex.status = map[i].wc_status;
- break;
- }
- }
-}
-
static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
{
struct hns_roce_qp *qp = NULL;
@@ -1765,19 +1725,16 @@ static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
return V2_CQ_POLL_ERR;
status = hr_reg_read(cqe, CQE_STATUS);
+ cq->verbs_cq.cq_ex.status = get_wc_status(status);
+
+ if (status == HNS_ROCE_V2_CQE_SUCCESS)
+ return V2_CQ_OK;
/*
* once a cqe in error status, the driver needs to help the HW to
* generated flushed cqes for all subsequent wqes
*/
- if (status != HNS_ROCE_V2_CQE_SUCCESS) {
- handle_error_cqe_ex(cq, status);
- return hns_roce_flush_cqe(qp, status);
- }
-
- cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS;
-
- return V2_CQ_OK;
+ return hns_roce_flush_cqe(qp, status);
}
static int wc_start_poll_cq(struct ibv_cq_ex *current,
--
2.30.0

View File

@ -1,323 +0,0 @@
From 9dd7b55957ccc720a6844613af9d43680d8fbaad Mon Sep 17 00:00:00 2001
From: Xinhao Liu <liuxinhao5@hisilicon.com>
Date: Mon, 7 Mar 2022 18:49:38 +0800
Subject: libhns: Refactor hns roce v2 poll one() and wc poll cqe()
hns_roce_v2_poll_one() and wc_poll_cqe() have a lot of repetitive code.
Aggregating the repetitive parts of these two functions into one function
hns_roce_poll_one() can reduce the repetitive code.
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 228 +++++++++++++++----------------
1 file changed, 107 insertions(+), 121 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 2804450d..42a77151 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -285,6 +285,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
struct hns_roce_qp *qp)
+
{
struct hns_roce_db sq_db = {};
@@ -548,21 +549,101 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
wc->opcode = wc_send_op_map[opcode];
}
-static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
- struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
+ struct hns_roce_cq *cq)
{
- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
+ struct hns_roce_wq *wq = &hr_qp->sq;
+
+ if (hr_qp->sq_signal_bits)
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
+
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+}
+
+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
+ struct hns_roce_cq *cq)
+{
+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
+}
+
+static void cqe_proc_rq(struct hns_roce_wq *wq, struct hns_roce_cq *cq)
+{
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+}
+
+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
+ struct hns_roce_cq *cq)
+{
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
+ struct hns_roce_srq *srq = NULL;
+ uint32_t wqe_idx;
+
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
+ cqe_proc_sq(qp, wqe_idx, cq);
+ } else {
+ if (get_srq_from_cqe(cqe, ctx, qp, &srq))
+ return V2_CQ_POLL_ERR;
+
+ if (srq)
+ cqe_proc_srq(srq, wqe_idx, cq);
+ else
+ cqe_proc_rq(&qp->rq, cq);
+ }
+
+ return 0;
+}
+
+static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq,
+ struct hns_roce_qp *cur_qp, struct ibv_wc *wc)
+{
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
struct hns_roce_srq *srq = NULL;
- struct hns_roce_v2_cqe *cqe;
uint8_t opcode;
- uint8_t status;
+
+ if (!wc) {
+ if (cqe_proc_wq(ctx, cur_qp, cq))
+ return V2_CQ_POLL_ERR;
+
+ return 0;
+ }
+
+ opcode = hr_reg_read(cqe, CQE_OPCODE);
+
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
+ parse_cqe_for_req(cqe, wc, cur_qp, opcode);
+ } else {
+ wc->byte_len = le32toh(cqe->byte_cnt);
+ get_opcode_for_resp(cqe, wc, opcode);
+
+ if (get_srq_from_cqe(cqe, ctx, cur_qp, &srq))
+ return V2_CQ_POLL_ERR;
+
+ if (srq)
+ parse_cqe_for_srq(cqe, wc, srq);
+ else
+ parse_cqe_for_resp(cqe, wc, cur_qp, opcode);
+ }
+
+ return 0;
+}
+
+static int hns_roce_poll_one(struct hns_roce_context *ctx,
+ struct hns_roce_qp **cur_qp, struct hns_roce_cq *cq,
+ struct ibv_wc *wc)
+{
+ struct hns_roce_v2_cqe *cqe;
+ uint8_t status, wc_status;
uint32_t qpn;
- bool is_send;
cqe = next_cqe_sw_v2(cq);
if (!cqe)
- return V2_CQ_EMPTY;
+ return wc ? V2_CQ_EMPTY : ENOENT;
+ cq->cqe = cqe;
++cq->cons_index;
udma_from_device_barrier();
@@ -576,31 +657,20 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
return V2_CQ_POLL_ERR;
}
- opcode = hr_reg_read(cqe, CQE_OPCODE);
- is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
- if (is_send) {
- parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
- } else {
- wc->byte_len = le32toh(cqe->byte_cnt);
- get_opcode_for_resp(cqe, wc, opcode);
+ if (parse_cqe_for_cq(ctx, cq, *cur_qp, wc))
+ return V2_CQ_POLL_ERR;
- if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq))
- return V2_CQ_POLL_ERR;
+ status = hr_reg_read(cqe, CQE_STATUS);
+ wc_status = get_wc_status(status);
- if (srq) {
- parse_cqe_for_srq(cqe, wc, srq);
- } else {
- if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode))
- return V2_CQ_POLL_ERR;
- }
+ if (wc) {
+ wc->status = wc_status;
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
+ wc->qp_num = qpn;
+ } else {
+ cq->verbs_cq.cq_ex.status = wc_status;
}
- wc->qp_num = qpn;
-
- status = hr_reg_read(cqe, CQE_STATUS);
- wc->status = get_wc_status(status);
- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
-
if (status == HNS_ROCE_V2_CQE_SUCCESS)
return V2_CQ_OK;
@@ -614,16 +684,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
struct ibv_wc *wc)
{
- int npolled;
- int err = V2_CQ_OK;
- struct hns_roce_qp *qp = NULL;
- struct hns_roce_cq *cq = to_hr_cq(ibvcq);
struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
+ struct hns_roce_cq *cq = to_hr_cq(ibvcq);
+ struct hns_roce_qp *qp = NULL;
+ int err = V2_CQ_OK;
+ int npolled;
pthread_spin_lock(&cq->lock);
for (npolled = 0; npolled < ne; ++npolled) {
- err = hns_roce_v2_poll_one(cq, &qp, wc + npolled);
+ err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
if (err != V2_CQ_OK)
break;
}
@@ -1651,97 +1721,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
return ret;
}
-static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
- struct hns_roce_cq *cq)
-{
- struct hns_roce_wq *wq = &hr_qp->sq;
-
- if (hr_qp->sq_signal_bits)
- wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
-
- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
-}
-
-static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
- struct hns_roce_cq *cq)
-{
- cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
- hns_roce_free_srq_wqe(srq, wqe_idx);
-}
-
-static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
-{
- struct hns_roce_wq *wq = &hr_qp->rq;
-
- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
-}
-
-static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
- struct hns_roce_cq *cq)
-{
- struct hns_roce_v2_cqe *cqe = cq->cqe;
- struct hns_roce_srq *srq = NULL;
- uint32_t wqe_idx;
-
- wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
- if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
- cqe_proc_sq(qp, wqe_idx, cq);
- } else {
- if (get_srq_from_cqe(cqe, ctx, qp, &srq))
- return V2_CQ_POLL_ERR;
-
- if (srq)
- cqe_proc_srq(srq, wqe_idx, cq);
- else
- cqe_proc_rq(qp, cq);
- }
- return 0;
-}
-
-static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
-{
- struct hns_roce_qp *qp = NULL;
- struct hns_roce_v2_cqe *cqe;
- uint8_t status;
- uint32_t qpn;
-
- cqe = next_cqe_sw_v2(cq);
- if (!cqe)
- return ENOENT;
-
- ++cq->cons_index;
- udma_from_device_barrier();
-
- cq->cqe = cqe;
- qpn = hr_reg_read(cqe, CQE_LCL_QPN);
-
- qp = hns_roce_v2_find_qp(ctx, qpn);
- if (!qp)
- return V2_CQ_POLL_ERR;
-
- if (cqe_proc_wq(ctx, qp, cq))
- return V2_CQ_POLL_ERR;
-
- status = hr_reg_read(cqe, CQE_STATUS);
- cq->verbs_cq.cq_ex.status = get_wc_status(status);
-
- if (status == HNS_ROCE_V2_CQE_SUCCESS)
- return V2_CQ_OK;
-
- /*
- * once a cqe in error status, the driver needs to help the HW to
- * generated flushed cqes for all subsequent wqes
- */
- return hns_roce_flush_cqe(qp, status);
-}
-
static int wc_start_poll_cq(struct ibv_cq_ex *current,
struct ibv_poll_cq_attr *attr)
{
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ struct hns_roce_qp *qp = NULL;
int err;
if (attr->comp_mask)
@@ -1749,7 +1734,7 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
pthread_spin_lock(&cq->lock);
- err = wc_poll_cqe(ctx, cq);
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (err != V2_CQ_OK)
pthread_spin_unlock(&cq->lock);
@@ -1760,9 +1745,10 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
{
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ struct hns_roce_qp *qp = NULL;
int err;
- err = wc_poll_cqe(ctx, cq);
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (err != V2_CQ_OK)
return err;
--
2.30.0

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +0,0 @@
From 1e5f8bb89169453cfdd17bf58cef7186dcf58596 Mon Sep 17 00:00:00 2001
From: Youming Luo <luoyouming@huawei.com>
Date: Wed, 16 Mar 2022 17:36:39 +0800
Subject: libhns: Add general error type for CQE
If a Work Request posted in an RQ of UD QP isn't big enough for holding the
incoming message, then the hns ROCEE will generate a general error CQE. The
IB specification does not specify this type of CQE.
In the case of unreliable communication, it is not desirable to change the
QP to an error state due to an insufficient receive length error. So If the
hns ROCEE reports a general error CQE, it's no need to set the QP to an
error state, and the driver should skip it.
Signed-off-by: Youming Luo <luoyouming@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 4 +++-
providers/hns/hns_roce_u_hw_v2.h | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 42a77151..fab1939b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -166,6 +166,7 @@ static enum ibv_wc_status get_wc_status(uint8_t status)
{ HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR },
{ HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
{ HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
+ { HNS_ROCE_V2_CQE_GENERAL_ERR, IBV_WC_GENERAL_ERR },
{ HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
};
@@ -671,7 +672,8 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx,
cq->verbs_cq.cq_ex.status = wc_status;
}
- if (status == HNS_ROCE_V2_CQE_SUCCESS)
+ if (status == HNS_ROCE_V2_CQE_SUCCESS ||
+ status == HNS_ROCE_V2_CQE_GENERAL_ERR)
return V2_CQ_OK;
/*
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 0068f4fe..122fdbdf 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -110,6 +110,7 @@ enum {
HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR = 0x15,
HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR = 0x16,
HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR = 0x22,
+ HNS_ROCE_V2_CQE_GENERAL_ERR = 0x23,
HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR = 0x24,
};
--
2.30.0

View File

@ -1,34 +0,0 @@
From 847336b7634b51548996b879f42c786a108885f1 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Fri, 8 Apr 2022 11:31:07 +0800
Subject: [PATCH 46/47] libhns: Fix the shift size of SQ WQE
Currently, the shift size of SQ WQE is based on the size of the SQ WQE
structure of HIP06. Although the size of SQ WQE of HIP08 is the same as
the size of SQ WQE of HIP06, it is not a correct way for HIP08 to use the
structure of HIP06 to define the size of SQ WQE.
Fixes: b6cd213b276f ("libhns: Refactor for creating qp")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 1457a1a2..215d82ec 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1068,8 +1068,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
}
if (attr->cap.max_send_wr) {
- qp->sq.wqe_shift =
- hr_ilog32(sizeof(struct hns_roce_rc_send_wqe));
+ qp->sq.wqe_shift = HNS_ROCE_SQWQE_SHIFT;
cnt = roundup_pow_of_two(attr->cap.max_send_wr);
qp->sq.wqe_cnt = cnt;
qp->sq.shift = hr_ilog32(cnt);
--
2.30.0

File diff suppressed because it is too large Load Diff

View File

@ -1,27 +0,0 @@
From c381cfa26ba6163b9cc51212702e64bf1d83f838 Mon Sep 17 00:00:00 2001
From: swimlessbird <52704385+swimlessbird@users.noreply.github.com>
Date: Fri, 17 Sep 2021 14:35:05 +0800
Subject: [PATCH] ibdiags: Increase maximum number of CPUs
In modern systems, the old limit (8) is small enough, so increase
to something larger (256).
Signed-off-by: Suwan Sun <swimlessbird@gmail.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
infiniband-diags/ibsysstat.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/infiniband-diags/ibsysstat.c b/infiniband-diags/ibsysstat.c
index 6ff7ca0c4..73972d039 100644
--- a/infiniband-diags/ibsysstat.c
+++ b/infiniband-diags/ibsysstat.c
@@ -41,7 +41,7 @@
#include "ibdiag_common.h"
-#define MAX_CPUS 8
+#define MAX_CPUS 256
static struct ibmad_port *srcport;

Binary file not shown.

BIN
rdma-core-41.0.tar.gz Normal file

Binary file not shown.

View File

@ -1,60 +1,11 @@
Name: rdma-core
Version: 35.1
Release: 7
Version: 41.0
Release: 1
Summary: RDMA core userspace libraries and daemons
License: GPLv2 or BSD
Url: https://github.com/linux-rdma/rdma-core
Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz
Patch0: backport-fixbug-increase-maximum-number-of-cpus-rdma.patch
Patch1: 0001-Update-kernel-headers.patch
Patch2: 0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch
Patch3: 0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch
Patch4: 0004-libhns-Remove-unsupported-QP-type.patch
Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch
Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch
Patch7: 0007-libhns-Add-support-for-direct-wqe.patch
Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch
Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch
Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch
Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch
Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch
Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch
Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch
Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch
Patch16: 0016-libhns-Optimize-set_sge-process.patch
Patch17: 0017-verbs-Add-generic-logging-API.patch
Patch18: 0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch
Patch19: 0019-libhns-The-function-declaration-should-be-the-same-a.patch
Patch20: 0020-libhns-The-content-of-the-header-file-should-be-prot.patch
Patch21: 0021-libhns-Fix-wrong-type-of-variables-and-fields.patch
Patch22: 0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch
Patch23: 0023-libhns-Remove-redundant-variable-initialization.patch
Patch24: 0024-libhns-Remove-unused-macros.patch
Patch25: 0025-libhns-Refactor-the-poll-one-interface.patch
Patch26: 0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch
Patch27: 0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch
Patch28: 0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch
Patch29: 0029-libhns-Add-vendor_err-information-for-error-WC.patch
Patch30: 0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch
Patch31: 0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch
Patch32: 0032-util-Fix-mmio-memcpy-on-ARM.patch
Patch33: 0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch
Patch34: 0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch
Patch35: 0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch
Patch36: 0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch
Patch37: 0037-libhns-Fix-wrong-HIP08-version-macro.patch
Patch38: 0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch
Patch39: 0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch
Patch40: 0040-libhns-Add-support-for-creating-extended-CQ.patch
Patch41: 0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch
Patch42: 0042-libhns-Optimize-the-error-handling-of-CQE.patch
Patch43: 0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch
Patch44: 0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch
Patch45: 0045-libhns-Add-general-error-type-for-CQE.patch
Patch46: 0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch
Patch47: 0047-libhns-Remove-support-for-HIP06.patch
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators
@ -298,6 +249,12 @@ fi
%{_mandir}/*
%changelog
* Tue Sep 27 2022 tangchengchang <tangchengchang@huawei.com> - 41.0-1
- Type: requirement
- ID: NA
- SUG: NA
- DESC: update to 41.0
* Tue Sep 06 2022 luozhengfeng <luozhengfeng@h-partners.com> - 35.1-7
- Type: bugfix
- ID: NA