!43 Update to 41.0
From: @hellotcc Reviewed-by: @li-yangyang20 Signed-off-by: @li-yangyang20
This commit is contained in:
commit
5e0943666e
@ -1,41 +0,0 @@
|
||||
From 693d55e80976217215844258e5b78bc115382689 Mon Sep 17 00:00:00 2001
|
||||
From: Guofeng Yue <yueguofeng@hisilicon.com>
|
||||
Date: Mon, 10 Jan 2022 10:44:23 +0800
|
||||
Subject: [PATCH 1/8] Update kernel headers
|
||||
|
||||
To commit 62c4d8878d13 ("RDMA/hns: Remove support for HIP06").
|
||||
|
||||
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
|
||||
---
|
||||
kernel-headers/rdma/hns-abi.h | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
|
||||
index 42b17765..abfd36e2 100644
|
||||
--- a/kernel-headers/rdma/hns-abi.h
|
||||
+++ b/kernel-headers/rdma/hns-abi.h
|
||||
@@ -77,17 +77,19 @@ enum hns_roce_qp_cap_flags {
|
||||
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
|
||||
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
|
||||
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
|
||||
+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
|
||||
};
|
||||
|
||||
struct hns_roce_ib_create_qp_resp {
|
||||
__aligned_u64 cap_flags;
|
||||
+ __aligned_u64 dwqe_mmap_key;
|
||||
};
|
||||
|
||||
struct hns_roce_ib_alloc_ucontext_resp {
|
||||
__u32 qp_tab_size;
|
||||
__u32 cqe_size;
|
||||
- __u32 srq_tab_size;
|
||||
- __u32 reserved;
|
||||
+ __u32 srq_tab_size;
|
||||
+ __u32 reserved;
|
||||
};
|
||||
|
||||
struct hns_roce_ib_alloc_pd_resp {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,120 +0,0 @@
|
||||
From 08ec3c43bf9710fdf3ca664f7cd63436e67339d7 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:34 +0800
|
||||
Subject: [PATCH 2/8] libhns: Fix the ownership of the head/tail pointer of SRQ
|
||||
WQE
|
||||
|
||||
The CQE of SRQ is not generated in the order of wqe, so the wqe_idx
|
||||
corresponding to the idle WQE should be placed in a FIFO, then the hardware
|
||||
will be instructed to obtain the corresponding WQE. Therefore, the WQ
|
||||
of SRQ has no concept of head pointer and tail pointer, but the queue of
|
||||
wqe_idx does.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 4 ++--
|
||||
providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------
|
||||
providers/hns/hns_roce_u_verbs.c | 6 +++---
|
||||
3 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 8f805dd1..b3f48113 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -205,6 +205,8 @@ struct hns_roce_idx_que {
|
||||
int entry_shift;
|
||||
unsigned long *bitmap;
|
||||
int bitmap_cnt;
|
||||
+ unsigned int head;
|
||||
+ unsigned int tail;
|
||||
};
|
||||
|
||||
struct hns_roce_srq {
|
||||
@@ -217,8 +219,6 @@ struct hns_roce_srq {
|
||||
unsigned int max_gs;
|
||||
unsigned int rsv_sge;
|
||||
unsigned int wqe_shift;
|
||||
- int head;
|
||||
- int tail;
|
||||
unsigned int *db;
|
||||
unsigned short counter;
|
||||
struct hns_roce_idx_que idx_que;
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4988943a..f947dbd7 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -262,7 +262,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
|
||||
bitmap_num = ind / BIT_CNT_PER_LONG;
|
||||
bit_num = ind % BIT_CNT_PER_LONG;
|
||||
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
|
||||
- srq->tail++;
|
||||
+ srq->idx_que.tail++;
|
||||
|
||||
pthread_spin_unlock(&srq->lock);
|
||||
}
|
||||
@@ -1564,7 +1564,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
pthread_spin_lock(&srq->lock);
|
||||
|
||||
/* current idx of srqwq */
|
||||
- ind = srq->head & (srq->wqe_cnt - 1);
|
||||
+ ind = srq->idx_que.head & (srq->wqe_cnt - 1);
|
||||
|
||||
max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
@@ -1574,7 +1574,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- if (srq->head == srq->tail) {
|
||||
+ if (srq->idx_que.head == srq->idx_que.tail) {
|
||||
ret = -ENOMEM;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
@@ -1607,7 +1607,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
}
|
||||
|
||||
if (nreq) {
|
||||
- srq->head += nreq;
|
||||
+ srq->idx_que.head += nreq;
|
||||
|
||||
/*
|
||||
* Make sure that descriptors are written before
|
||||
@@ -1617,8 +1617,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
|
||||
srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S |
|
||||
srq->srqn);
|
||||
- srq_db.parameter =
|
||||
- htole32(srq->head & DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
+ srq_db.parameter = htole32(srq->idx_que.head &
|
||||
+ DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
|
||||
hns_roce_write64((uint32_t *)&srq_db, ctx,
|
||||
ROCEE_VF_DB_CFG0_OFFSET);
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 30ab072a..9b4934b9 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -491,6 +491,9 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
for (i = 0; i < idx_que->bitmap_cnt; ++i)
|
||||
idx_que->bitmap[i] = ~(0UL);
|
||||
|
||||
+ idx_que->head = 0;
|
||||
+ idx_que->tail = srq->wqe_cnt - 1;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -512,9 +515,6 @@ static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
- srq->head = 0;
|
||||
- srq->tail = srq->wqe_cnt - 1;
|
||||
-
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,180 +0,0 @@
|
||||
From 9cc4c4b8d31b35428859ef626d4428fc393aace4 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Thu, 11 Nov 2021 21:08:35 +0800
|
||||
Subject: [PATCH 3/8] libhns: Fix wrong data type when writing doorbell
|
||||
|
||||
The DB data is a __le32[] value instead of uint32_t[], and the DB register
|
||||
should be written with a little-endian data instead of uint64_t.
|
||||
|
||||
Fixes: 1523fbb1ea8e ("libhns: Add verbs of cq support")
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_db.h | 14 ++++----------
|
||||
providers/hns/hns_roce_u_hw_v1.c | 17 +++++++++--------
|
||||
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++-----------
|
||||
3 files changed, 25 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
|
||||
index b44e64d4..13df9b52 100644
|
||||
--- a/providers/hns/hns_roce_u_db.h
|
||||
+++ b/providers/hns/hns_roce_u_db.h
|
||||
@@ -32,23 +32,17 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
+#include <util/mmio.h>
|
||||
#include "hns_roce_u.h"
|
||||
|
||||
#ifndef _HNS_ROCE_U_DB_H
|
||||
#define _HNS_ROCE_U_DB_H
|
||||
|
||||
-#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0])
|
||||
-#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1])
|
||||
-#else
|
||||
-#error __BYTE_ORDER not defined
|
||||
-#endif
|
||||
+#define HNS_ROCE_WORD_NUM 2
|
||||
|
||||
-static inline void hns_roce_write64(uint32_t val[2],
|
||||
- struct hns_roce_context *ctx, int offset)
|
||||
+static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
|
||||
{
|
||||
- *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val);
|
||||
+ mmio_write64_le(dest, *(__le64 *)val);
|
||||
}
|
||||
|
||||
void *hns_roce_alloc_db(struct hns_roce_context *ctx,
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 8f0a71aa..14ee4817 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -65,7 +65,7 @@ static void hns_roce_update_rq_head(struct hns_roce_context *ctx,
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
|
||||
@@ -84,7 +84,7 @@ static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
@@ -102,7 +102,7 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
CQ_DB_U32_4_CONS_IDX_S,
|
||||
cq->cons_index & ((cq->cq_depth << 1) - 1));
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe,
|
||||
@@ -422,10 +422,11 @@ static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
*/
|
||||
static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
{
|
||||
- uint32_t ci;
|
||||
- uint32_t solicited_flag;
|
||||
- struct hns_roce_cq_db cq_db = {};
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
+ struct hns_roce_cq_db cq_db = {};
|
||||
+ uint32_t solicited_flag;
|
||||
+ uint32_t ci;
|
||||
|
||||
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
|
||||
solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL :
|
||||
@@ -441,8 +442,8 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M,
|
||||
CQ_DB_U32_4_CONS_IDX_S, ci);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
|
||||
- ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index f947dbd7..efd949f4 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -293,7 +293,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
HNS_ROCE_V2_RQ_DB);
|
||||
rq_db.parameter = htole32(rq_head);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
sq_db.parameter = htole32(sq_head);
|
||||
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
@@ -325,7 +325,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
|
||||
DB_PARAM_CQ_CMD_SN_S, 1);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
}
|
||||
|
||||
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
@@ -659,11 +659,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
|
||||
static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
{
|
||||
- uint32_t ci;
|
||||
- uint32_t cmd_sn;
|
||||
- uint32_t solicited_flag;
|
||||
- struct hns_roce_db cq_db = {};
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
+ struct hns_roce_db cq_db = {};
|
||||
+ uint32_t solicited_flag;
|
||||
+ uint32_t cmd_sn;
|
||||
+ uint32_t ci;
|
||||
|
||||
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
|
||||
cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK;
|
||||
@@ -681,8 +682,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
DB_PARAM_CQ_CMD_SN_S, cmd_sn);
|
||||
roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
|
||||
- ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1620,8 +1621,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
srq_db.parameter = htole32(srq->idx_que.head &
|
||||
DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&srq_db, ctx,
|
||||
- ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
||||
+ (__le32 *)&srq_db);
|
||||
}
|
||||
|
||||
pthread_spin_unlock(&srq->lock);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
From 60d82566fc94b11280be26733bc306e6af3d2697 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 9 Nov 2021 20:40:58 +0800
|
||||
Subject: [PATCH 4/8] libhns: Remove unsupported QP type
|
||||
|
||||
Currently, user space does not support UC type QP.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v1.c | 1 -
|
||||
providers/hns/hns_roce_u_hw_v2.c | 3 +--
|
||||
2 files changed, 1 insertion(+), 3 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 14ee4817..279c9b0f 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -532,7 +532,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
ctrl->flag |= htole32(ps_opcode);
|
||||
wqe += sizeof(struct hns_roce_wqe_raddr_seg);
|
||||
break;
|
||||
- case IBV_QPT_UC:
|
||||
case IBV_QPT_UD:
|
||||
default:
|
||||
break;
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index efd949f4..c62f74b5 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -460,8 +460,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
|
||||
struct hns_roce_qp **cur_qp,
|
||||
struct ibv_wc *wc, uint32_t opcode)
|
||||
{
|
||||
- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC ||
|
||||
- (*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_UC) &&
|
||||
+ if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
|
||||
(opcode == HNS_ROCE_RECV_OP_SEND ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
From e460a4208d1821b1477e621ad5a7b72068e844f9 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:32 +0800
|
||||
Subject: [PATCH 5/8] libhns: Avoid using WQE indexes that exceed the SRQ size
|
||||
|
||||
The index of SRQ WQE got from bitmap may be greater than the capability,
|
||||
so a check for that should be added.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++------
|
||||
1 file changed, 14 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index c62f74b5..1169b64b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1527,8 +1527,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int find_empty_entry(struct hns_roce_idx_que *idx_que)
|
||||
+static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
int bit_num;
|
||||
int i;
|
||||
|
||||
@@ -1536,12 +1537,20 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que)
|
||||
for (i = 0; i < idx_que->bitmap_cnt && idx_que->bitmap[i] == 0; ++i)
|
||||
;
|
||||
if (i == idx_que->bitmap_cnt)
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
|
||||
bit_num = ffsl(idx_que->bitmap[i]);
|
||||
idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
|
||||
|
||||
- return i * BIT_CNT_PER_LONG + (bit_num - 1);
|
||||
+ *wqe_idx = i * BIT_CNT_PER_LONG + (bit_num - 1);
|
||||
+
|
||||
+ /* If wqe_cnt is less than BIT_CNT_PER_LONG, wqe_idx may be greater
|
||||
+ * than wqe_cnt.
|
||||
+ */
|
||||
+ if (*wqe_idx >= srq->wqe_cnt)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
@@ -1580,9 +1589,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- wqe_idx = find_empty_entry(&srq->idx_que);
|
||||
- if (wqe_idx < 0 || wqe_idx >= srq->wqe_cnt) {
|
||||
- ret = -ENOMEM;
|
||||
+ ret = get_wqe_idx(srq, &wqe_idx);
|
||||
+ if (ret) {
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From 91034654bdb2fd6e1fce81b4c1aea41bb4b6bf98 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:33 +0800
|
||||
Subject: [PATCH 6/8] libhns: Don't create RQ for a QP that associated with a
|
||||
SRQ
|
||||
|
||||
If a QP is associated with a SRQ, it's RQ should not be created.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 9b4934b9..125858d2 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -760,6 +760,11 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
|
||||
cap->max_recv_sge > ctx->max_sge)
|
||||
return -EINVAL;
|
||||
|
||||
+ if (attr->srq) {
|
||||
+ cap->max_recv_wr = 0;
|
||||
+ cap->max_recv_sge = 0;
|
||||
+ }
|
||||
+
|
||||
min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ?
|
||||
HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM;
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,368 +0,0 @@
|
||||
From 64c66455fef1c908cc8f06a2b71aa2fd71806218 Mon Sep 17 00:00:00 2001
|
||||
From: Yixing Liu <liuyixing1@huawei.com>
|
||||
Date: Wed, 15 Dec 2021 16:42:30 +0800
|
||||
Subject: [PATCH 7/8] libhns: Add support for direct wqe
|
||||
|
||||
The current write wqe mechanism is to write to DDR first, and then notify
|
||||
the hardware through doorbell to read the data. Direct wqe is a mechanism
|
||||
to fill wqe directly into the hardware. In the case of light load, the wqe
|
||||
will be filled into pcie bar space of the hardware, this will reduce one
|
||||
memory access operation and therefore reduce the latency. SIMD instructions
|
||||
allows cpu to write the 512 bits at one time to device memory, thus it can
|
||||
be used for posting direct wqe.
|
||||
|
||||
The process of post send of HIP08/09:
|
||||
|
||||
+-----------+
|
||||
| post send |
|
||||
+-----+-----+
|
||||
|
|
||||
+-----+-----+
|
||||
| write WQE |
|
||||
+-----+-----+
|
||||
|
|
||||
| udma_to_device_barrier()
|
||||
|
|
||||
+-----+-----+ Y +-----------+ N
|
||||
| HIP09 ? +------+ multi WR ?+-------------+
|
||||
+-----+-----+ +-----+-----+ |
|
||||
| N | Y |
|
||||
+-----+-----+ +-----+-----+ +--------+--------+
|
||||
| ring DB | | ring DB | |direct WQE (ST4) |
|
||||
+-----------+ +-----------+ +-----------------+
|
||||
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 5 +++-
|
||||
providers/hns/hns_roce_u_hw_v2.c | 43 ++++++++++++++++++++++++++------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 31 +++++++++++++----------
|
||||
providers/hns/hns_roce_u_verbs.c | 26 +++++++++++++++++--
|
||||
util/mmio.h | 27 +++++++++++++++++++-
|
||||
5 files changed, 107 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index b3f48113..37711363 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -80,6 +80,8 @@
|
||||
|
||||
#define INVALID_SGE_LENGTH 0x80000000
|
||||
|
||||
+#define HNS_ROCE_DWQE_PAGE_SIZE 65536
|
||||
+
|
||||
#define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF
|
||||
#define HNS_ROCE_ADDRESS_SHIFT 32
|
||||
|
||||
@@ -279,13 +281,14 @@ struct hns_roce_qp {
|
||||
struct hns_roce_sge_ex ex_sge;
|
||||
unsigned int next_sge;
|
||||
int port_num;
|
||||
- int sl;
|
||||
+ uint8_t sl;
|
||||
unsigned int qkey;
|
||||
enum ibv_mtu path_mtu;
|
||||
|
||||
struct hns_roce_rinl_buf rq_rinl_buf;
|
||||
unsigned long flags;
|
||||
int refcnt; /* specially used for XRC */
|
||||
+ void *dwqe_page;
|
||||
};
|
||||
|
||||
struct hns_roce_av {
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 1169b64b..f102fd61 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
+#include <sys/mman.h>
|
||||
#include "hns_roce_u.h"
|
||||
#include "hns_roce_u_db.h"
|
||||
#include "hns_roce_u_hw_v2.h"
|
||||
@@ -297,20 +298,40 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
- unsigned int qpn, unsigned int sl,
|
||||
- unsigned int sq_head)
|
||||
+ struct hns_roce_qp *qp)
|
||||
{
|
||||
struct hns_roce_db sq_db = {};
|
||||
|
||||
- sq_db.byte_4 = htole32(qpn);
|
||||
+ sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num);
|
||||
roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
HNS_ROCE_V2_SQ_DB);
|
||||
- sq_db.parameter = htole32(sq_head);
|
||||
- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
|
||||
|
||||
+ sq_db.parameter = htole32(qp->sq.head);
|
||||
+ roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
+static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
||||
+{
|
||||
+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
|
||||
+}
|
||||
+
|
||||
+static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
+{
|
||||
+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
|
||||
+
|
||||
+ /* All kinds of DirectWQE have the same header field layout */
|
||||
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M,
|
||||
+ RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M,
|
||||
+ RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
|
||||
+ RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
|
||||
+
|
||||
+ hns_roce_write512(qp->dwqe_page, wqe);
|
||||
+}
|
||||
+
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
struct hns_roce_cq *cq)
|
||||
{
|
||||
@@ -339,8 +360,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx,
|
||||
- struct hns_roce_qp *qp)
|
||||
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
|
||||
{
|
||||
uint32_t qpn = qp->verbs_qp.qp.qp_num;
|
||||
uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
@@ -1196,6 +1216,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
break;
|
||||
case IBV_QPT_UD:
|
||||
ret = set_ud_wqe(wqe, qp, wr, nreq, &sge_info);
|
||||
+ qp->sl = to_hr_ah(wr->wr.ud.ah)->av.sl;
|
||||
break;
|
||||
default:
|
||||
ret = EINVAL;
|
||||
@@ -1214,7 +1235,10 @@ out:
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_update_sq_db(ctx, ibvqp->qp_num, qp->sl, qp->sq.head);
|
||||
+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
|
||||
+ hns_roce_write_dwqe(qp, wqe);
|
||||
+ else
|
||||
+ hns_roce_update_sq_db(ctx, qp);
|
||||
|
||||
if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
|
||||
*(qp->sdb) = qp->sq.head & 0xffff;
|
||||
@@ -1506,6 +1530,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
|
||||
+ munmap(qp->dwqe_page, HNS_ROCE_DWQE_PAGE_SIZE);
|
||||
+
|
||||
hns_roce_v2_clear_qp(ctx, qp);
|
||||
|
||||
hns_roce_lock_cqs(ibqp);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index c13d82e3..af72cd70 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -40,6 +40,8 @@
|
||||
|
||||
#define HNS_ROCE_CMDSN_MASK 0x3
|
||||
|
||||
+#define HNS_ROCE_SL_SHIFT 2
|
||||
+
|
||||
/* V2 REG DEFINITION */
|
||||
#define ROCEE_VF_DB_CFG0_OFFSET 0x0230
|
||||
|
||||
@@ -133,6 +135,8 @@ struct hns_roce_db {
|
||||
#define DB_BYTE_4_CMD_S 24
|
||||
#define DB_BYTE_4_CMD_M GENMASK(27, 24)
|
||||
|
||||
+#define DB_BYTE_4_FLAG_S 31
|
||||
+
|
||||
#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0
|
||||
#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0)
|
||||
|
||||
@@ -216,8 +220,16 @@ struct hns_roce_rc_sq_wqe {
|
||||
};
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
|
||||
-#define RC_SQ_WQE_BYTE_4_OPCODE_M \
|
||||
- (((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S)
|
||||
+#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15
|
||||
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_OWNER_S 7
|
||||
|
||||
@@ -239,6 +251,8 @@ struct hns_roce_rc_sq_wqe {
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
|
||||
|
||||
+#define RC_SQ_WQE_BYTE_4_FLAG_S 31
|
||||
+
|
||||
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
|
||||
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
|
||||
(((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
|
||||
@@ -311,23 +325,12 @@ struct hns_roce_ud_sq_wqe {
|
||||
#define UD_SQ_WQE_OPCODE_S 0
|
||||
#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0)
|
||||
|
||||
-#define UD_SQ_WQE_DB_SL_L_S 5
|
||||
-#define UD_SQ_WQE_DB_SL_L_M GENMASK(6, 5)
|
||||
-
|
||||
-#define UD_SQ_WQE_DB_SL_H_S 13
|
||||
-#define UD_SQ_WQE_DB_SL_H_M GENMASK(14, 13)
|
||||
-
|
||||
-#define UD_SQ_WQE_INDEX_S 15
|
||||
-#define UD_SQ_WQE_INDEX_M GENMASK(30, 15)
|
||||
-
|
||||
#define UD_SQ_WQE_OWNER_S 7
|
||||
|
||||
#define UD_SQ_WQE_CQE_S 8
|
||||
|
||||
#define UD_SQ_WQE_SE_S 11
|
||||
|
||||
-#define UD_SQ_WQE_FLAG_S 31
|
||||
-
|
||||
#define UD_SQ_WQE_PD_S 0
|
||||
#define UD_SQ_WQE_PD_M GENMASK(23, 0)
|
||||
|
||||
@@ -376,4 +379,6 @@ struct hns_roce_ud_sq_wqe {
|
||||
|
||||
#define MAX_SERVICE_LEVEL 0x7
|
||||
|
||||
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
|
||||
+
|
||||
#endif /* _HNS_ROCE_U_HW_V2_H */
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 125858d2..fc902815 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -1076,7 +1076,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
|
||||
|
||||
static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
|
||||
struct hns_roce_qp *qp,
|
||||
- struct hns_roce_context *ctx)
|
||||
+ struct hns_roce_context *ctx,
|
||||
+ uint64_t *dwqe_mmap_key)
|
||||
{
|
||||
struct hns_roce_create_qp_ex_resp resp_ex = {};
|
||||
struct hns_roce_create_qp_ex cmd_ex = {};
|
||||
@@ -1093,6 +1094,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
|
||||
&resp_ex.ibv_resp, sizeof(resp_ex));
|
||||
|
||||
qp->flags = resp_ex.drv_payload.cap_flags;
|
||||
+ *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1144,11 +1146,23 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
|
||||
+ uint64_t dwqe_mmap_key)
|
||||
+{
|
||||
+ qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE,
|
||||
+ MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key);
|
||||
+ if (qp->dwqe_page == MAP_FAILED)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
struct ibv_qp_init_attr_ex *attr)
|
||||
{
|
||||
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
|
||||
struct hns_roce_qp *qp;
|
||||
+ uint64_t dwqe_mmap_key;
|
||||
int ret;
|
||||
|
||||
ret = verify_qp_create_attr(context, attr);
|
||||
@@ -1167,7 +1181,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
if (ret)
|
||||
goto err_buf;
|
||||
|
||||
- ret = qp_exec_create_cmd(attr, qp, context);
|
||||
+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
|
||||
if (ret)
|
||||
goto err_cmd;
|
||||
|
||||
@@ -1175,10 +1189,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
if (ret)
|
||||
goto err_store;
|
||||
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
|
||||
+ ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key);
|
||||
+ if (ret)
|
||||
+ goto err_dwqe;
|
||||
+ }
|
||||
+
|
||||
qp_setup_config(attr, qp, context);
|
||||
|
||||
return &qp->verbs_qp.qp;
|
||||
|
||||
+err_dwqe:
|
||||
+ hns_roce_v2_clear_qp(context, qp);
|
||||
err_store:
|
||||
ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
|
||||
err_cmd:
|
||||
diff --git a/util/mmio.h b/util/mmio.h
|
||||
index 101af9dd..01d1455e 100644
|
||||
--- a/util/mmio.h
|
||||
+++ b/util/mmio.h
|
||||
@@ -210,8 +210,33 @@ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
{
|
||||
s390_mmio_write(dest, src, bytecnt);
|
||||
}
|
||||
-#else
|
||||
|
||||
+#elif defined(__aarch64__) || defined(__arm__)
|
||||
+#include <arm_neon.h>
|
||||
+
|
||||
+static inline void _mmio_memcpy_x64_64b(void *dest, const void *src)
|
||||
+{
|
||||
+ vst4q_u64(dest, vld4q_u64(src));
|
||||
+}
|
||||
+
|
||||
+static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
+{
|
||||
+ do {
|
||||
+ _mmio_memcpy_x64_64b(dest, src);
|
||||
+ bytecnt -= sizeof(uint64x2x4_t);
|
||||
+ src += sizeof(uint64x2x4_t);
|
||||
+ } while (bytecnt > 0);
|
||||
+}
|
||||
+
|
||||
+#define mmio_memcpy_x64(dest, src, bytecount) \
|
||||
+ ({ \
|
||||
+ if (__builtin_constant_p((bytecount) == 64)) \
|
||||
+ _mmio_memcpy_x64_64b((dest), (src)); \
|
||||
+ else \
|
||||
+ _mmio_memcpy_x64((dest), (src), (bytecount)); \
|
||||
+ })
|
||||
+
|
||||
+#else
|
||||
/* Transfer is some multiple of 64 bytes */
|
||||
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From 608c142e7cbac2a6c02071022fe87b081a6ddc4f Mon Sep 17 00:00:00 2001
|
||||
From: Yixing Liu <liuyixing1@huawei.com>
|
||||
Date: Tue, 21 Dec 2021 21:38:08 +0800
|
||||
Subject: [PATCH 8/8] libhns: Use new SQ doorbell register for HIP09
|
||||
|
||||
HIP09 set a new BAR space for SQ doorbell. Each SQ doorbell has an
|
||||
independent BAR space and the size is 64KB. SQ doorbell share
|
||||
the same BAR space with direct WQE.
|
||||
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 1 +
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
|
||||
providers/hns/hns_roce_u_verbs.c | 5 +++++
|
||||
3 files changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 37711363..460363b7 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -238,6 +238,7 @@ struct hns_roce_wq {
|
||||
unsigned int wqe_shift;
|
||||
unsigned int shift; /* wq size is 2^shift */
|
||||
int offset;
|
||||
+ void *db_reg;
|
||||
};
|
||||
|
||||
/* record the result of sge process */
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index f102fd61..9cbc0aac 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
|
||||
sq_db.parameter = htole32(qp->sq.head);
|
||||
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
|
||||
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
+ hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
||||
@@ -329,7 +329,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
|
||||
RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
|
||||
|
||||
- hns_roce_write512(qp->dwqe_page, wqe);
|
||||
+ hns_roce_write512(qp->sq.db_reg, wqe);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index fc902815..c5022c83 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -1117,6 +1117,11 @@ static void qp_setup_config(struct ibv_qp_init_attr_ex *attr,
|
||||
}
|
||||
|
||||
qp->max_inline_data = attr->cap.max_inline_data;
|
||||
+
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
|
||||
+ qp->sq.db_reg = qp->dwqe_page;
|
||||
+ else
|
||||
+ qp->sq.db_reg = ctx->uar + ROCEE_VF_DB_CFG0_OFFSET;
|
||||
}
|
||||
|
||||
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:35 +0800
|
||||
Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR
|
||||
|
||||
If the user post a list of WRs, the head in the for loop is not updated in
|
||||
time, and the judgment of if (head == tail) becomes invalid.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++----
|
||||
providers/hns/hns_roce_u_verbs.c | 2 +-
|
||||
2 files changed, 14 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 82124082..0c15bdbe 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
+ unsigned int cur;
|
||||
+
|
||||
+ cur = idx_que->head - idx_que->tail;
|
||||
+ return cur >= srq->wqe_cnt - 1;
|
||||
+}
|
||||
+
|
||||
static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
|
||||
max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
- if (wr->num_sge > max_sge) {
|
||||
- ret = -EINVAL;
|
||||
+ if (hns_roce_v2_srqwq_overflow(srq)) {
|
||||
+ ret = -ENOMEM;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
|
||||
- if (srq->idx_que.head == srq->idx_que.tail) {
|
||||
- ret = -ENOMEM;
|
||||
+ if (wr->num_sge > max_sge) {
|
||||
+ ret = -EINVAL;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 3abf7b48..dace35fd 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
idx_que->bitmap[i] = ~(0UL);
|
||||
|
||||
idx_que->head = 0;
|
||||
- idx_que->tail = srq->wqe_cnt - 1;
|
||||
+ idx_que->tail = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:36 +0800
|
||||
Subject: libhns: Allow users to create a 0-depth SRQs
|
||||
|
||||
Users is allowed to create 0-depth SRQs, so the judgement about whether
|
||||
max_wr is zero should be removed.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index dace35fd..2d1a6de3 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context,
|
||||
init_attr->srq_type != IBV_SRQT_XRC)
|
||||
return -EINVAL;
|
||||
|
||||
- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge ||
|
||||
+ if (!init_attr->attr.max_sge ||
|
||||
init_attr->attr.max_wr > context->max_srq_wr ||
|
||||
init_attr->attr.max_sge > context->max_srq_sge)
|
||||
return -EINVAL;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,176 +0,0 @@
|
||||
From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:37 +0800
|
||||
Subject: libhns: Refactor the process of post_srq_recv
|
||||
|
||||
SRQ is a shared queue, it mainly consists of four parts:
|
||||
|
||||
1. wqe buf: wqe buf is used to store wqe data.
|
||||
|
||||
2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so
|
||||
the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO
|
||||
queue, it can instruct the hardware to obtain the corresponding WQE.
|
||||
|
||||
3.bitmap: bitmap is used to generate and release wqe_idx. When the user
|
||||
has a new WR, the driver finds the idx of the idle wqe in bitmap. When the
|
||||
CQE of wqe is generated, the driver releases the idx.
|
||||
|
||||
4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it
|
||||
to the user when ibv_poll_cq() is invoked.
|
||||
|
||||
After refactor, the functions of the four parts are more clearer.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++-------------
|
||||
1 file changed, 57 insertions(+), 38 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 0c15bdbe..b622eaef 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
||||
return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
|
||||
}
|
||||
|
||||
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
|
||||
+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
||||
{
|
||||
return srq->buf.buf + (n << srq->wqe_shift);
|
||||
}
|
||||
@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
return cur >= srq->wqe_cnt - 1;
|
||||
}
|
||||
|
||||
-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
+static int check_post_srq_valid(struct hns_roce_srq *srq,
|
||||
+ struct ibv_recv_wr *wr)
|
||||
+{
|
||||
+ unsigned int max_sge = srq->max_gs - srq->rsv_sge;
|
||||
+
|
||||
+ if (hns_roce_v2_srqwq_overflow(srq))
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (wr->num_sge > max_sge)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
int bit_num;
|
||||
@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
|
||||
+ struct ibv_recv_wr *wr)
|
||||
+{
|
||||
+ struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
+ int i;
|
||||
+
|
||||
+ dseg = get_srq_wqe(srq, wqe_idx);
|
||||
+
|
||||
+ for (i = 0; i < wr->num_sge; ++i) {
|
||||
+ dseg[i].len = htole32(wr->sg_list[i].length);
|
||||
+ dseg[i].lkey = htole32(wr->sg_list[i].lkey);
|
||||
+ dseg[i].addr = htole64(wr->sg_list[i].addr);
|
||||
+ }
|
||||
+
|
||||
+ /* hw stop reading when identify the last one */
|
||||
+ if (srq->rsv_sge) {
|
||||
+ dseg[i].len = htole32(INVALID_SGE_LENGTH);
|
||||
+ dseg[i].lkey = htole32(0x0);
|
||||
+ dseg[i].addr = 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
||||
+{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
+ unsigned int head;
|
||||
+ __le32 *idx_buf;
|
||||
+
|
||||
+ head = idx_que->head & (srq->wqe_cnt - 1);
|
||||
+
|
||||
+ idx_buf = get_idx_buf(idx_que, head);
|
||||
+ *idx_buf = htole32(wqe_idx);
|
||||
+
|
||||
+ idx_que->head++;
|
||||
+}
|
||||
+
|
||||
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
struct ibv_recv_wr *wr,
|
||||
struct ibv_recv_wr **bad_wr)
|
||||
{
|
||||
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
|
||||
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
|
||||
- struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
struct hns_roce_db srq_db;
|
||||
- unsigned int max_sge;
|
||||
- __le32 *srq_idx;
|
||||
+ unsigned int wqe_idx;
|
||||
int ret = 0;
|
||||
- int wqe_idx;
|
||||
- void *wqe;
|
||||
int nreq;
|
||||
- int ind;
|
||||
- int i;
|
||||
|
||||
pthread_spin_lock(&srq->lock);
|
||||
|
||||
- /* current idx of srqwq */
|
||||
- ind = srq->idx_que.head & (srq->wqe_cnt - 1);
|
||||
-
|
||||
- max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
- if (hns_roce_v2_srqwq_overflow(srq)) {
|
||||
- ret = -ENOMEM;
|
||||
- *bad_wr = wr;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- if (wr->num_sge > max_sge) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = check_post_srq_valid(srq, wr);
|
||||
+ if (ret) {
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- wqe = get_srq_wqe(srq, wqe_idx);
|
||||
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
|
||||
-
|
||||
- for (i = 0; i < wr->num_sge; ++i) {
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
- dseg++;
|
||||
- }
|
||||
-
|
||||
- /* hw stop reading when identify the last one */
|
||||
- if (srq->rsv_sge)
|
||||
- set_ending_data_seg(dseg);
|
||||
-
|
||||
- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind);
|
||||
- *srq_idx = htole32(wqe_idx);
|
||||
+ fill_srq_wqe(srq, wqe_idx, wr);
|
||||
+ fill_wqe_idx(srq, wqe_idx);
|
||||
|
||||
srq->wrid[wqe_idx] = wr->wr_id;
|
||||
- ind = (ind + 1) & (srq->wqe_cnt - 1);
|
||||
}
|
||||
|
||||
if (nreq) {
|
||||
- srq->idx_que.head += nreq;
|
||||
-
|
||||
/*
|
||||
* Make sure that descriptors are written before
|
||||
* we write doorbell record.
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:38 +0800
|
||||
Subject: libhns: Set srqlimit to 0 when creating SRQ
|
||||
|
||||
According to the IB specification, the srq_limt parameter should not be
|
||||
configured when creating srq. But the libhns does not set attr.srq_limit
|
||||
to 0 currently. As a result, when attr.srq_limit provided by the user is
|
||||
not 0, the value of attr.srq_limit returned to the user will be different
|
||||
from that obtained by ibv_query_srq(). Therefore, the driver should set
|
||||
attr.srq_limit to 0 when creating SRQ.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 2d1a6de3..107da753 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||
srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
|
||||
srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
|
||||
attr->max_sge = srq->max_gs;
|
||||
+ attr->srq_limit = 0;
|
||||
|
||||
ret = hns_roce_create_idx_que(srq);
|
||||
if (ret)
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,367 +0,0 @@
|
||||
From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:39 +0800
|
||||
Subject: libhns: Refactor the process of create_srq
|
||||
|
||||
Reorganize create_srq() as several sub-functions to make the process
|
||||
clearer.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 7 +-
|
||||
providers/hns/hns_roce_u_hw_v2.c | 2 +-
|
||||
providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++-------------
|
||||
3 files changed, 105 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index b3f48113..a437727c 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -211,7 +211,8 @@ struct hns_roce_idx_que {
|
||||
|
||||
struct hns_roce_srq {
|
||||
struct verbs_srq verbs_srq;
|
||||
- struct hns_roce_buf buf;
|
||||
+ struct hns_roce_idx_que idx_que;
|
||||
+ struct hns_roce_buf wqe_buf;
|
||||
pthread_spinlock_t lock;
|
||||
unsigned long *wrid;
|
||||
unsigned int srqn;
|
||||
@@ -221,7 +222,6 @@ struct hns_roce_srq {
|
||||
unsigned int wqe_shift;
|
||||
unsigned int *db;
|
||||
unsigned short counter;
|
||||
- struct hns_roce_idx_que idx_que;
|
||||
};
|
||||
|
||||
struct hns_roce_wq {
|
||||
@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
|
||||
|
||||
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
|
||||
{
|
||||
- return container_of(container_of(ibv_srq, struct verbs_srq, srq),
|
||||
- struct hns_roce_srq, verbs_srq);
|
||||
+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq);
|
||||
}
|
||||
|
||||
static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp)
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index b622eaef..d4e7e4f9 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
||||
|
||||
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
||||
{
|
||||
- return srq->buf.buf + (n << srq->wqe_shift);
|
||||
+ return srq->wqe_buf.buf + (n << srq->wqe_shift);
|
||||
}
|
||||
|
||||
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 107da753..75b9e530 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
|
||||
uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
|
||||
ctx->srq_table_shift;
|
||||
|
||||
+ pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+
|
||||
if (!ctx->srq_table[tind].refcnt) {
|
||||
ctx->srq_table[tind].table =
|
||||
calloc(ctx->srq_table_mask + 1,
|
||||
sizeof(struct hns_roce_srq *));
|
||||
- if (!ctx->srq_table[tind].table)
|
||||
+ if (!ctx->srq_table[tind].table) {
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
return -ENOMEM;
|
||||
+ }
|
||||
}
|
||||
|
||||
++ctx->srq_table[tind].refcnt;
|
||||
ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq;
|
||||
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
|
||||
{
|
||||
uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
|
||||
|
||||
+ pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+
|
||||
if (!--ctx->srq_table[tind].refcnt)
|
||||
free(ctx->srq_table[tind].table);
|
||||
else
|
||||
ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL;
|
||||
+
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+}
|
||||
+
|
||||
+static int verify_srq_create_attr(struct hns_roce_context *context,
|
||||
+ struct ibv_srq_init_attr_ex *attr)
|
||||
+{
|
||||
+ if (attr->srq_type != IBV_SRQT_BASIC &&
|
||||
+ attr->srq_type != IBV_SRQT_XRC)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!attr->attr.max_sge ||
|
||||
+ attr->attr.max_wr > context->max_srq_wr ||
|
||||
+ attr->attr.max_sge > context->max_srq_sge)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
|
||||
+ struct ibv_srq_init_attr_ex *attr)
|
||||
+{
|
||||
+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
+ srq->rsv_sge = 1;
|
||||
+
|
||||
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
|
||||
+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
|
||||
+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
+ srq->max_gs));
|
||||
+ attr->attr.max_sge = srq->max_gs;
|
||||
+ attr->attr.srq_limit = 0;
|
||||
}
|
||||
|
||||
-static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
+static int alloc_srq_idx_que(struct hns_roce_srq *srq)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
unsigned int buf_size;
|
||||
@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
BIT_CNT_PER_LONG;
|
||||
idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long));
|
||||
if (!idx_que->bitmap)
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
|
||||
buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift);
|
||||
if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) {
|
||||
free(idx_que->bitmap);
|
||||
idx_que->bitmap = NULL;
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
/* init the idx_que bitmap */
|
||||
@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq)
|
||||
{
|
||||
- int srq_buf_size;
|
||||
+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
|
||||
|
||||
- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long));
|
||||
- if (!srq->wrid)
|
||||
- return ENOMEM;
|
||||
+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE);
|
||||
+}
|
||||
|
||||
- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
- srq->max_gs));
|
||||
- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
|
||||
+static int alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ int ret;
|
||||
|
||||
- /* allocate srq wqe buf */
|
||||
- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) {
|
||||
- free(srq->wrid);
|
||||
- return ENOMEM;
|
||||
+ ret = alloc_srq_idx_que(srq);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = alloc_srq_wqe_buf(srq);
|
||||
+ if (ret)
|
||||
+ goto err_idx_que;
|
||||
+
|
||||
+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid));
|
||||
+ if (!srq->wrid) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto err_wqe_buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
-}
|
||||
|
||||
-static int hns_roce_verify_srq(struct hns_roce_context *context,
|
||||
- struct ibv_srq_init_attr_ex *init_attr)
|
||||
-{
|
||||
- if (init_attr->srq_type != IBV_SRQT_BASIC &&
|
||||
- init_attr->srq_type != IBV_SRQT_XRC)
|
||||
- return -EINVAL;
|
||||
+err_wqe_buf:
|
||||
+ hns_roce_free_buf(&srq->wqe_buf);
|
||||
+err_idx_que:
|
||||
+ hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free(srq->idx_que.bitmap);
|
||||
|
||||
- if (!init_attr->attr.max_sge ||
|
||||
- init_attr->attr.max_wr > context->max_srq_wr ||
|
||||
- init_attr->attr.max_sge > context->max_srq_sge)
|
||||
- return -EINVAL;
|
||||
+ return ret;
|
||||
+}
|
||||
|
||||
- return 0;
|
||||
+static void free_srq_buf(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ free(srq->wrid);
|
||||
+ hns_roce_free_buf(&srq->wqe_buf);
|
||||
+ hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free(srq->idx_que.bitmap);
|
||||
}
|
||||
|
||||
static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
struct hns_roce_create_srq_ex cmd_ex = {};
|
||||
int ret;
|
||||
|
||||
- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf;
|
||||
+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf;
|
||||
cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf;
|
||||
cmd_ex.db_addr = (uintptr_t)srq->db;
|
||||
|
||||
@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||
struct ibv_srq_init_attr_ex *init_attr)
|
||||
{
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(context);
|
||||
- struct ibv_srq_attr *attr = &init_attr->attr;
|
||||
+ struct hns_roce_context *hr_ctx = to_hr_ctx(context);
|
||||
struct hns_roce_srq *srq;
|
||||
int ret;
|
||||
|
||||
- if (hns_roce_verify_srq(ctx, init_attr))
|
||||
- return NULL;
|
||||
+ ret = verify_srq_create_attr(hr_ctx, init_attr);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
|
||||
srq = calloc(1, sizeof(*srq));
|
||||
- if (!srq)
|
||||
- return NULL;
|
||||
+ if (!srq) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto err;
|
||||
+ }
|
||||
|
||||
if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
|
||||
goto err_free_srq;
|
||||
|
||||
- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
- srq->rsv_sge = 1;
|
||||
-
|
||||
- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
|
||||
- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
|
||||
- attr->max_sge = srq->max_gs;
|
||||
- attr->srq_limit = 0;
|
||||
-
|
||||
- ret = hns_roce_create_idx_que(srq);
|
||||
- if (ret)
|
||||
+ set_srq_param(context, srq, init_attr);
|
||||
+ if (alloc_srq_buf(srq))
|
||||
goto err_free_srq;
|
||||
|
||||
- ret = hns_roce_alloc_srq_buf(srq);
|
||||
- if (ret)
|
||||
- goto err_idx_que;
|
||||
-
|
||||
- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
|
||||
+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB);
|
||||
if (!srq->db)
|
||||
goto err_srq_buf;
|
||||
|
||||
- *(srq->db) = 0;
|
||||
-
|
||||
- pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+ *srq->db = 0;
|
||||
|
||||
ret = exec_srq_create_cmd(context, srq, init_attr);
|
||||
if (ret)
|
||||
goto err_srq_db;
|
||||
|
||||
- ret = hns_roce_store_srq(ctx, srq);
|
||||
+ ret = hns_roce_store_srq(hr_ctx, srq);
|
||||
if (ret)
|
||||
goto err_destroy_srq;
|
||||
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
-
|
||||
- srq->max_gs = attr->max_sge;
|
||||
- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge);
|
||||
+ srq->max_gs = init_attr->attr.max_sge;
|
||||
+ init_attr->attr.max_sge =
|
||||
+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
|
||||
|
||||
return &srq->verbs_srq.srq;
|
||||
|
||||
@@ -617,20 +651,19 @@ err_destroy_srq:
|
||||
ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
|
||||
|
||||
err_srq_db:
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
|
||||
err_srq_buf:
|
||||
- free(srq->wrid);
|
||||
- hns_roce_free_buf(&srq->buf);
|
||||
-
|
||||
-err_idx_que:
|
||||
- free(srq->idx_que.bitmap);
|
||||
- hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free_srq_buf(srq);
|
||||
|
||||
err_free_srq:
|
||||
free(srq);
|
||||
|
||||
+err:
|
||||
+ if (ret < 0)
|
||||
+ ret = -ret;
|
||||
+
|
||||
+ errno = ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
|
||||
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
|
||||
int ret;
|
||||
|
||||
- pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
-
|
||||
ret = ibv_cmd_destroy_srq(ibv_srq);
|
||||
- if (ret) {
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+ if (ret)
|
||||
return ret;
|
||||
- }
|
||||
|
||||
hns_roce_clear_srq(ctx, srq->srqn);
|
||||
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
-
|
||||
hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
- hns_roce_free_buf(&srq->buf);
|
||||
- free(srq->wrid);
|
||||
- hns_roce_free_buf(&srq->idx_que.buf);
|
||||
- free(srq->idx_que.bitmap);
|
||||
+ free_srq_buf(srq);
|
||||
free(srq);
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:40 +0800
|
||||
Subject: libhns: Remove the reserved wqe of SRQ
|
||||
|
||||
There is an unreasonable reserved WQE in SRQ, it should be removed.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 1 +
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 +---
|
||||
providers/hns/hns_roce_u_verbs.c | 5 ++++-
|
||||
3 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index a437727c..0d7abd81 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -64,6 +64,7 @@
|
||||
#define HNS_ROCE_MIN_CQE_NUM 0x40
|
||||
#define HNS_ROCE_V1_MIN_WQE_NUM 0x20
|
||||
#define HNS_ROCE_V2_MIN_WQE_NUM 0x40
|
||||
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
|
||||
|
||||
#define HNS_ROCE_CQE_SIZE 0x20
|
||||
#define HNS_ROCE_V3_CQE_SIZE 0x40
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index d4e7e4f9..2fb6cdaf 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
- unsigned int cur;
|
||||
|
||||
- cur = idx_que->head - idx_que->tail;
|
||||
- return cur >= srq->wqe_cnt - 1;
|
||||
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
|
||||
}
|
||||
|
||||
static int check_post_srq_valid(struct hns_roce_srq *srq,
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 75b9e530..4847639b 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context,
|
||||
attr->attr.max_sge > context->max_srq_sge)
|
||||
return -EINVAL;
|
||||
|
||||
+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr,
|
||||
+ HNS_ROCE_MIN_SRQ_WQE_NUM);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
|
||||
if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
srq->rsv_sge = 1;
|
||||
|
||||
- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
|
||||
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr);
|
||||
srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
|
||||
srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
srq->max_gs));
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001
|
||||
From: Weihang Li <liweihang@huawei.com>
|
||||
Date: Fri, 14 May 2021 10:02:56 +0800
|
||||
Subject: libhns: Refactor process of setting extended sge
|
||||
|
||||
Refactor and encapsulate the parts of getting number of extended sge a WQE
|
||||
can use to make it easier to understand.
|
||||
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------
|
||||
1 file changed, 29 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 30ab072a..a8508fc5 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -920,31 +920,44 @@ err_alloc:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
-static void set_extend_sge_param(struct hns_roce_device *hr_dev,
|
||||
- struct ibv_qp_init_attr_ex *attr,
|
||||
- struct hns_roce_qp *qp, unsigned int wr_cnt)
|
||||
+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
|
||||
{
|
||||
- int cnt = 0;
|
||||
+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
|
||||
+ return qp->sq.max_gs;
|
||||
+
|
||||
+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
|
||||
+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void set_ext_sge_param(struct hns_roce_device *hr_dev,
|
||||
+ struct ibv_qp_init_attr_ex *attr,
|
||||
+ struct hns_roce_qp *qp, unsigned int wr_cnt)
|
||||
+{
|
||||
+ unsigned int total_sge_cnt;
|
||||
+ unsigned int wqe_sge_cnt;
|
||||
+
|
||||
+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
|
||||
|
||||
if (hr_dev->hw_version == HNS_ROCE_HW_VER1) {
|
||||
qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
|
||||
- } else {
|
||||
- qp->sq.max_gs = attr->cap.max_send_sge;
|
||||
- if (attr->qp_type == IBV_QPT_UD)
|
||||
- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs);
|
||||
- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
|
||||
- cnt = roundup_pow_of_two(wr_cnt *
|
||||
- (qp->sq.max_gs -
|
||||
- HNS_ROCE_SGE_IN_WQE));
|
||||
+ return;
|
||||
}
|
||||
|
||||
- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
|
||||
+ qp->sq.max_gs = attr->cap.max_send_sge;
|
||||
+
|
||||
+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
|
||||
|
||||
/* If the number of extended sge is not zero, they MUST use the
|
||||
* space of HNS_HW_PAGE_SIZE at least.
|
||||
*/
|
||||
- qp->ex_sge.sge_cnt = cnt ?
|
||||
- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0;
|
||||
+ if (wqe_sge_cnt) {
|
||||
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
|
||||
+ qp->ex_sge.sge_cnt =
|
||||
+ max(total_sge_cnt,
|
||||
+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||||
@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||||
qp->sq.wqe_cnt = cnt;
|
||||
qp->sq.shift = hr_ilog32(cnt);
|
||||
|
||||
- set_extend_sge_param(hr_dev, attr, qp, cnt);
|
||||
+ set_ext_sge_param(hr_dev, attr, qp, cnt);
|
||||
|
||||
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
|
||||
qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,139 +0,0 @@
|
||||
From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Fri, 14 May 2021 10:02:57 +0800
|
||||
Subject: libhns: Optimize set_sge process
|
||||
|
||||
Use local variables to avoid frequent ldr/str operations. And because UD's
|
||||
process of setting sge is more simple then RC, set_sge() can be splited
|
||||
into two functions for compiler optimization.
|
||||
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
|
||||
1 file changed, 61 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4988943a..dc79a6f8 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
- struct hns_roce_sge_info *sge_info)
|
||||
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
+ struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
||||
+ uint32_t index = sge_info->start_idx;
|
||||
+ struct ibv_sge *sge = wr->sg_list;
|
||||
+ uint32_t len = 0;
|
||||
+ uint32_t cnt = 0;
|
||||
+ int flag;
|
||||
int i;
|
||||
|
||||
- sge_info->valid_num = 0;
|
||||
- sge_info->total_len = 0;
|
||||
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
|
||||
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
||||
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
|
||||
|
||||
- for (i = 0; i < wr->num_sge; i++) {
|
||||
- if (unlikely(!wr->sg_list[i].length))
|
||||
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
||||
+ if (unlikely(!sge->length))
|
||||
continue;
|
||||
|
||||
- sge_info->total_len += wr->sg_list[i].length;
|
||||
- sge_info->valid_num++;
|
||||
+ len += sge->length;
|
||||
+ cnt++;
|
||||
|
||||
- if (wr->send_flags & IBV_SEND_INLINE &&
|
||||
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
||||
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
|
||||
+ if (flag)
|
||||
continue;
|
||||
|
||||
- /* No inner sge in UD wqe */
|
||||
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
|
||||
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
dseg++;
|
||||
} else {
|
||||
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
|
||||
- (qp->ex_sge.sge_cnt - 1));
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
- sge_info->start_idx++;
|
||||
+ dseg = get_send_sge_ex(qp, index & mask);
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
+ index++;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ sge_info->start_idx = index;
|
||||
+ sge_info->valid_num = cnt;
|
||||
+ sge_info->total_len = len;
|
||||
+}
|
||||
+
|
||||
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
+ struct hns_roce_sge_info *sge_info)
|
||||
+{
|
||||
+ int flag = wr->send_flags & IBV_SEND_INLINE;
|
||||
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
||||
+ uint32_t index = sge_info->start_idx;
|
||||
+ struct ibv_sge *sge = wr->sg_list;
|
||||
+ uint32_t len = 0;
|
||||
+ uint32_t cnt = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
||||
+ if (unlikely(!sge->length))
|
||||
+ continue;
|
||||
+
|
||||
+ len += sge->length;
|
||||
+ cnt++;
|
||||
+
|
||||
+ if (flag)
|
||||
+ continue;
|
||||
+
|
||||
+ /* No inner sge in UD wqe */
|
||||
+ dseg = get_send_sge_ex(qp, index & mask);
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
+ index++;
|
||||
+ }
|
||||
+
|
||||
+ sge_info->start_idx = index;
|
||||
+ sge_info->valid_num = cnt;
|
||||
+ sge_info->total_len = len;
|
||||
}
|
||||
|
||||
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
||||
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
UD_SQ_WQE_MSG_START_SGE_IDX_S,
|
||||
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
|
||||
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
||||
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
||||
|
||||
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
wqe += sizeof(struct hns_roce_rc_sq_wqe);
|
||||
dseg = wqe;
|
||||
|
||||
- set_sge(dseg, qp, wr, sge_info);
|
||||
+ set_rc_sge(dseg, qp, wr, sge_info);
|
||||
|
||||
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,258 +0,0 @@
|
||||
From 1ea1524950b8bc4e4dfe06865e1e5c47a657b6e4 Mon Sep 17 00:00:00 2001
|
||||
From: Gal Pressman <galpress@amazon.com>
|
||||
Date: Sun, 6 Jun 2021 14:48:07 +0300
|
||||
Subject: verbs: Add generic logging API
|
||||
|
||||
A debug prints mechanism is useful when debugging application failures.
|
||||
This patch adds a generic API that can be used by all providers and
|
||||
replace provider-specific counterparts.
|
||||
|
||||
The debug messages are controlled through an environment variable named
|
||||
VERBS_LOG_LEVEL, where the value indicates which prints should be
|
||||
enabled:
|
||||
|
||||
enum {
|
||||
VERBS_LOG_LEVEL_NONE,
|
||||
VERBS_LOG_ERR,
|
||||
VERBS_LOG_WARN,
|
||||
VERBS_LOG_INFO,
|
||||
VERBS_LOG_DEBUG,
|
||||
};
|
||||
|
||||
For example, to enable prints with level warn or higher, VERBS_LOG_LEVEL
|
||||
shall be set to 2.
|
||||
|
||||
The output shall be written to the file provided in the VERBS_LOG_FILE
|
||||
environment variable. When the library is compiled in debug mode and no
|
||||
file is provided the output shall be written to stderr.
|
||||
|
||||
For data-path flows, where the overhead of the additional if statement
|
||||
matters, the verbs_*_datapath() macros can be used, which will be
|
||||
compiled out when the library is compiled for release.
|
||||
|
||||
Signed-off-by: Gal Pressman <galpress@amazon.com>
|
||||
---
|
||||
Documentation/libibverbs.md | 18 ++++++++++
|
||||
buildlib/RDMA_BuildType.cmake | 4 +++
|
||||
libibverbs/driver.h | 50 +++++++++++++++++++++++++++
|
||||
libibverbs/init.c | 65 +++++++++++++++++++++++++++++++++++
|
||||
libibverbs/libibverbs.map.in | 1 +
|
||||
5 files changed, 138 insertions(+)
|
||||
|
||||
diff --git a/Documentation/libibverbs.md b/Documentation/libibverbs.md
|
||||
index cbe076e..980f354 100644
|
||||
--- a/Documentation/libibverbs.md
|
||||
+++ b/Documentation/libibverbs.md
|
||||
@@ -56,3 +56,21 @@ need to increase this limit. This is usually done for ordinary users
|
||||
via the file /etc/security/limits.conf. More configuration may be
|
||||
necessary if you are logging in via OpenSSH and your sshd is
|
||||
configured to use privilege separation.
|
||||
+
|
||||
+# Debugging
|
||||
+
|
||||
+### Enabling debug prints
|
||||
+
|
||||
+Library and providers debug prints can be enabled using the `VERBS_LOG_LEVEL`
|
||||
+environment variable, the output shall be written to the file provided in the
|
||||
+`VERBS_LOG_FILE` environment variable. When the library is compiled in debug
|
||||
+mode and no file is provided the output will be written to stderr.
|
||||
+
|
||||
+Note: some of the debug prints are only available when the library is compiled
|
||||
+in debug mode.
|
||||
+
|
||||
+The following table describes the expected behavior when VERBS_LOG_LEVEL is set:
|
||||
+| | Release | Debug |
|
||||
+|-----------------|---------------------------------|------------------------------------------------|
|
||||
+| Regular prints | Output to VERBS_LOG_FILE if set | Output to VERBS_LOG_FILE, or stderr if not set |
|
||||
+| Datapath prints | Compiled out, no output | Output to VERBS_LOG_FILE, or stderr if not set |
|
||||
diff --git a/buildlib/RDMA_BuildType.cmake b/buildlib/RDMA_BuildType.cmake
|
||||
index 17206f5..7a4f6a4 100644
|
||||
--- a/buildlib/RDMA_BuildType.cmake
|
||||
+++ b/buildlib/RDMA_BuildType.cmake
|
||||
@@ -39,4 +39,8 @@ function(RDMA_BuildType)
|
||||
CACHE STRING "Default flags for RelWithDebInfo configuration" FORCE)
|
||||
endif()
|
||||
endforeach()
|
||||
+
|
||||
+ if (CMAKE_BUILD_TYPE STREQUAL Debug OR CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
|
||||
+ add_definitions("-DVERBS_DEBUG")
|
||||
+ endif()
|
||||
endfunction()
|
||||
diff --git a/libibverbs/driver.h b/libibverbs/driver.h
|
||||
index 926023b..bdb1aa4 100644
|
||||
--- a/libibverbs/driver.h
|
||||
+++ b/libibverbs/driver.h
|
||||
@@ -49,6 +49,56 @@
|
||||
|
||||
struct verbs_device;
|
||||
|
||||
+enum {
|
||||
+ VERBS_LOG_LEVEL_NONE,
|
||||
+ VERBS_LOG_ERR,
|
||||
+ VERBS_LOG_WARN,
|
||||
+ VERBS_LOG_INFO,
|
||||
+ VERBS_LOG_DEBUG,
|
||||
+};
|
||||
+
|
||||
+void __verbs_log(struct verbs_context *ctx, uint32_t level,
|
||||
+ const char *fmt, ...);
|
||||
+
|
||||
+#define verbs_log(ctx, level, format, arg...) \
|
||||
+do { \
|
||||
+ int tmp = errno; \
|
||||
+ __verbs_log(ctx, level, "%s: %s:%d: " format, \
|
||||
+ (ctx)->context.device->name, __func__, __LINE__, ##arg); \
|
||||
+ errno = tmp; \
|
||||
+} while (0)
|
||||
+
|
||||
+#define verbs_debug(ctx, format, arg...) \
|
||||
+ verbs_log(ctx, VERBS_LOG_DEBUG, format, ##arg)
|
||||
+
|
||||
+#define verbs_info(ctx, format, arg...) \
|
||||
+ verbs_log(ctx, VERBS_LOG_INFO, format, ##arg)
|
||||
+
|
||||
+#define verbs_warn(ctx, format, arg...) \
|
||||
+ verbs_log(ctx, VERBS_LOG_WARN, format, ##arg)
|
||||
+
|
||||
+#define verbs_err(ctx, format, arg...) \
|
||||
+ verbs_log(ctx, VERBS_LOG_ERR, format, ##arg)
|
||||
+
|
||||
+#ifdef VERBS_DEBUG
|
||||
+#define verbs_log_datapath(ctx, level, format, arg...) \
|
||||
+ verbs_log(ctx, level, format, ##arg)
|
||||
+#else
|
||||
+#define verbs_log_datapath(ctx, level, format, arg...) {}
|
||||
+#endif
|
||||
+
|
||||
+#define verbs_debug_datapath(ctx, format, arg...) \
|
||||
+ verbs_log_datapath(ctx, VERBS_LOG_DEBUG, format, ##arg)
|
||||
+
|
||||
+#define verbs_info_datapath(ctx, format, arg...) \
|
||||
+ verbs_log_datapath(ctx, VERBS_LOG_INFO, format, ##arg)
|
||||
+
|
||||
+#define verbs_warn_datapath(ctx, format, arg...) \
|
||||
+ verbs_log_datapath(ctx, VERBS_LOG_WARN, format, ##arg)
|
||||
+
|
||||
+#define verbs_err_datapath(ctx, format, arg...) \
|
||||
+ verbs_log_datapath(ctx, VERBS_LOG_ERR, format, ##arg)
|
||||
+
|
||||
enum verbs_xrcd_mask {
|
||||
VERBS_XRCD_HANDLE = 1 << 0,
|
||||
VERBS_XRCD_RESERVED = 1 << 1
|
||||
diff --git a/libibverbs/init.c b/libibverbs/init.c
|
||||
index f5340ea..52b166a 100644
|
||||
--- a/libibverbs/init.c
|
||||
+++ b/libibverbs/init.c
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <glob.h>
|
||||
+#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
@@ -52,11 +53,30 @@
|
||||
#include <rdma/rdma_netlink.h>
|
||||
|
||||
#include <util/util.h>
|
||||
+#include "driver.h"
|
||||
#include "ibverbs.h"
|
||||
#include <infiniband/cmd_write.h>
|
||||
|
||||
int abi_ver;
|
||||
|
||||
+static uint32_t verbs_log_level;
|
||||
+static FILE *verbs_log_fp;
|
||||
+
|
||||
+__attribute__((format(printf, 3, 4)))
|
||||
+void __verbs_log(struct verbs_context *ctx, uint32_t level,
|
||||
+ const char *fmt, ...)
|
||||
+{
|
||||
+ va_list args;
|
||||
+
|
||||
+ if (level <= verbs_log_level) {
|
||||
+ int tmp = errno;
|
||||
+ va_start(args, fmt);
|
||||
+ vfprintf(verbs_log_fp, fmt, args);
|
||||
+ va_end(args);
|
||||
+ errno = tmp;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
struct ibv_driver {
|
||||
struct list_node entry;
|
||||
const struct verbs_device_ops *ops;
|
||||
@@ -600,6 +620,49 @@ out:
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
+static void verbs_set_log_level(void)
|
||||
+{
|
||||
+ char *env;
|
||||
+
|
||||
+ env = getenv("VERBS_LOG_LEVEL");
|
||||
+ if (env)
|
||||
+ verbs_log_level = strtol(env, NULL, 0);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Fallback in case log file is not provided or can't be opened.
|
||||
+ * Release mode: disable debug prints.
|
||||
+ * Debug mode: Use stderr instead of a file.
|
||||
+ */
|
||||
+static void verbs_log_file_fallback(void)
|
||||
+{
|
||||
+#ifdef VERBS_DEBUG
|
||||
+ verbs_log_fp = stderr;
|
||||
+#else
|
||||
+ verbs_log_level = VERBS_LOG_LEVEL_NONE;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static void verbs_set_log_file(void)
|
||||
+{
|
||||
+ char *env;
|
||||
+
|
||||
+ if (verbs_log_level == VERBS_LOG_LEVEL_NONE)
|
||||
+ return;
|
||||
+
|
||||
+ env = getenv("VERBS_LOG_FILE");
|
||||
+ if (!env) {
|
||||
+ verbs_log_file_fallback();
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ verbs_log_fp = fopen(env, "aw+");
|
||||
+ if (!verbs_log_fp) {
|
||||
+ verbs_log_file_fallback();
|
||||
+ return;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int ibverbs_init(void)
|
||||
{
|
||||
char *env_value;
|
||||
@@ -621,6 +684,8 @@ int ibverbs_init(void)
|
||||
return -errno;
|
||||
|
||||
check_memlock_limit();
|
||||
+ verbs_set_log_level();
|
||||
+ verbs_set_log_file();
|
||||
|
||||
return 0;
|
||||
}
|
||||
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
|
||||
index 7c0fb6a..905f58f 100644
|
||||
--- a/libibverbs/libibverbs.map.in
|
||||
+++ b/libibverbs/libibverbs.map.in
|
||||
@@ -167,6 +167,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ {
|
||||
global:
|
||||
/* These historical symbols are now private to libibverbs */
|
||||
__ioctl_final_num_attrs;
|
||||
+ __verbs_log;
|
||||
_verbs_init_and_alloc_context;
|
||||
execute_ioctl;
|
||||
ibv_cmd_advise_mr;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,164 +0,0 @@
|
||||
From 7c9a7a5848d19b792d1b108da55fa48611142a9b Mon Sep 17 00:00:00 2001
|
||||
From: Gal Pressman <galpress@amazon.com>
|
||||
Date: Tue, 29 Jun 2021 10:43:29 +0300
|
||||
Subject: libhns: Use the verbs logging API instead of printf/fprintf
|
||||
|
||||
Use the generic verbs logging API instead of calling printf/fprintf
|
||||
directly.
|
||||
This means that by default the prints will no longer be seen, but can be
|
||||
enabled by setting VERBS_LOG_LEVEL appropriately.
|
||||
|
||||
Signed-off-by: Gal Pressman <galpress@amazon.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v1.c | 34 +++++++++++++++++++++-----------
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
|
||||
providers/hns/hns_roce_u_verbs.c | 6 ++++--
|
||||
3 files changed, 28 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 279c9b0..6e107af 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -108,7 +108,6 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe,
|
||||
struct ibv_wc *wc)
|
||||
{
|
||||
- fprintf(stderr, PFX "error cqe!\n");
|
||||
switch (roce_get_field(cqe->cqe_byte_4,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
|
||||
@@ -176,7 +175,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq)
|
||||
static void *get_recv_wqe(struct hns_roce_qp *qp, int n)
|
||||
{
|
||||
if ((n < 0) || (n > qp->rq.wqe_cnt)) {
|
||||
- printf("rq wqe index:%d,rq wqe cnt:%d\r\n", n, qp->rq.wqe_cnt);
|
||||
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
|
||||
+ "rq wqe index:%d,rq wqe cnt:%d\r\n", n,
|
||||
+ qp->rq.wqe_cnt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -186,7 +187,9 @@ static void *get_recv_wqe(struct hns_roce_qp *qp, int n)
|
||||
static void *get_send_wqe(struct hns_roce_qp *qp, int n)
|
||||
{
|
||||
if ((n < 0) || (n > qp->sq.wqe_cnt)) {
|
||||
- printf("sq wqe index:%d,sq wqe cnt:%d\r\n", n, qp->sq.wqe_cnt);
|
||||
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
|
||||
+ "sq wqe index:%d,sq wqe cnt:%d\r\n", n,
|
||||
+ qp->sq.wqe_cnt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -207,8 +210,9 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
|
||||
cur = wq->head - wq->tail;
|
||||
pthread_spin_unlock(&cq->lock);
|
||||
|
||||
- printf("wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
|
||||
- wq->head, wq->tail, wq->max_post, nreq);
|
||||
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ "wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
|
||||
+ wq->head, wq->tail, wq->max_post, nreq);
|
||||
|
||||
return cur + nreq >= wq->max_post;
|
||||
}
|
||||
@@ -221,7 +225,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
|
||||
if (ctx->qp_table[tind].refcnt) {
|
||||
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
|
||||
} else {
|
||||
- printf("hns_roce_find_qp fail!\n");
|
||||
+ verbs_err(&ctx->ibv_ctx, "hns_roce_find_qp fail!\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@@ -273,7 +277,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
|
||||
*cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context),
|
||||
qpn & 0xffffff);
|
||||
if (!*cur_qp) {
|
||||
- fprintf(stderr, PFX "can't find qp!\n");
|
||||
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ PFX "can't find qp!\n");
|
||||
return CQ_POLL_ERR;
|
||||
}
|
||||
}
|
||||
@@ -312,6 +317,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
|
||||
if (roce_get_field(cqe->cqe_byte_4,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) {
|
||||
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ PFX "error cqe!\n");
|
||||
hns_roce_handle_error_cqe(cqe, wc);
|
||||
return CQ_OK;
|
||||
}
|
||||
@@ -475,8 +482,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
if (wr->num_sge > qp->sq.max_gs) {
|
||||
ret = -1;
|
||||
*bad_wr = wr;
|
||||
- printf("wr->num_sge(<=%d) = %d, check failed!\r\n",
|
||||
- qp->sq.max_gs, wr->num_sge);
|
||||
+ verbs_err(verbs_get_ctx(ibvqp->context),
|
||||
+ "wr->num_sge(<=%d) = %d, check failed!\r\n",
|
||||
+ qp->sq.max_gs, wr->num_sge);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -544,8 +552,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
if (le32toh(ctrl->msg_length) > qp->max_inline_data) {
|
||||
ret = -1;
|
||||
*bad_wr = wr;
|
||||
- printf("inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n",
|
||||
- wr->send_flags, ctrl->msg_length);
|
||||
+ verbs_err(verbs_get_ctx(ibvqp->context),
|
||||
+ "inline data len(1-32)=%d, send_flags = 0x%x, check failed!\r\n",
|
||||
+ wr->send_flags, ctrl->msg_length);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -650,7 +659,8 @@ static int hns_roce_u_v1_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
||||
|
||||
if (!ret && (attr_mask & IBV_QP_PORT)) {
|
||||
hr_qp->port_num = attr->port_num;
|
||||
- printf("hr_qp->port_num= 0x%x\n", hr_qp->port_num);
|
||||
+ verbs_err(verbs_get_ctx(qp->context), "hr_qp->port_num= 0x%x\n",
|
||||
+ hr_qp->port_num);
|
||||
}
|
||||
|
||||
hr_qp->sl = attr->ah_attr.sl;
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4c21720..d4b76b5 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -629,8 +629,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
|
||||
ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode);
|
||||
if (ret) {
|
||||
- fprintf(stderr,
|
||||
- PFX "failed to handle recv inline wqe!\n");
|
||||
+ verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ PFX "failed to handle recv inline wqe!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 2a9e880..8840a9d 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -164,12 +164,14 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
|
||||
struct ib_uverbs_reg_mr_resp resp;
|
||||
|
||||
if (!addr) {
|
||||
- fprintf(stderr, "2nd parm addr is NULL!\n");
|
||||
+ verbs_err(verbs_get_ctx(pd->context),
|
||||
+ "2nd parm addr is NULL!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!length) {
|
||||
- fprintf(stderr, "3st parm length is 0!\n");
|
||||
+ verbs_err(verbs_get_ctx(pd->context),
|
||||
+ "3st parm length is 0!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
From 4780e0a4c8cf2112425d04b939825a30603d87e6 Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Tue, 9 Nov 2021 20:41:03 +0800
|
||||
Subject: libhns: The function declaration should be the same as the definition
|
||||
|
||||
The parameter names should be the same when the function is declared and
|
||||
defined.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 21a5a6b..a5aa469 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -372,9 +372,9 @@ int hns_roce_u_free_pd(struct ibv_pd *pd);
|
||||
|
||||
struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
|
||||
uint64_t hca_va, int access);
|
||||
-int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
|
||||
+int hns_roce_u_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
|
||||
void *addr, size_t length, int access);
|
||||
-int hns_roce_u_dereg_mr(struct verbs_mr *mr);
|
||||
+int hns_roce_u_dereg_mr(struct verbs_mr *vmr);
|
||||
|
||||
struct ibv_mw *hns_roce_u_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type);
|
||||
int hns_roce_u_dealloc_mw(struct ibv_mw *mw);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
From 46c810472a1a6e3e093c21b6bcd43af0a0eda10b Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Tue, 9 Nov 2021 20:41:02 +0800
|
||||
Subject: libhns: The content of the header file should be protected with
|
||||
#define
|
||||
|
||||
Header files should be protected with #define to prevent repeated
|
||||
inclusion.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_db.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
|
||||
index 13df9b5..ca056c3 100644
|
||||
--- a/providers/hns/hns_roce_u_db.h
|
||||
+++ b/providers/hns/hns_roce_u_db.h
|
||||
@@ -29,14 +29,14 @@
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
+#ifndef _HNS_ROCE_U_DB_H
|
||||
+#define _HNS_ROCE_U_DB_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <util/mmio.h>
|
||||
#include "hns_roce_u.h"
|
||||
|
||||
-#ifndef _HNS_ROCE_U_DB_H
|
||||
-#define _HNS_ROCE_U_DB_H
|
||||
|
||||
#define HNS_ROCE_WORD_NUM 2
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,124 +0,0 @@
|
||||
From dc29ea131407fbbe93497059b61e3ef22a675df1 Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Tue, 9 Nov 2021 20:41:01 +0800
|
||||
Subject: libhns: Fix wrong type of variables and fields
|
||||
|
||||
Some variables and fields should be in type of unsigned instead of signed.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 6 +++---
|
||||
providers/hns/hns_roce_u_hw_v1.c | 6 +++---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 11 +++++------
|
||||
3 files changed, 11 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index a5aa469..92dc26c 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -101,7 +101,7 @@
|
||||
#define roce_set_bit(origin, shift, val) \
|
||||
roce_set_field((origin), (1ul << (shift)), (shift), (val))
|
||||
|
||||
-#define hr_ilog32(n) ilog32((n) - 1)
|
||||
+#define hr_ilog32(n) ilog32((unsigned int)(n) - 1)
|
||||
|
||||
enum {
|
||||
HNS_ROCE_QP_TABLE_BITS = 8,
|
||||
@@ -205,7 +205,7 @@ struct hns_roce_cq {
|
||||
|
||||
struct hns_roce_idx_que {
|
||||
struct hns_roce_buf buf;
|
||||
- int entry_shift;
|
||||
+ unsigned int entry_shift;
|
||||
unsigned long *bitmap;
|
||||
int bitmap_cnt;
|
||||
unsigned int head;
|
||||
@@ -252,7 +252,7 @@ struct hns_roce_sge_info {
|
||||
struct hns_roce_sge_ex {
|
||||
int offset;
|
||||
unsigned int sge_cnt;
|
||||
- int sge_shift;
|
||||
+ unsigned int sge_shift;
|
||||
};
|
||||
|
||||
struct hns_roce_rinl_sge {
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 6e107af..838e004 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
|
||||
static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
|
||||
uint32_t qpn)
|
||||
{
|
||||
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
|
||||
if (ctx->qp_table[tind].refcnt) {
|
||||
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
|
||||
@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
|
||||
|
||||
static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
|
||||
{
|
||||
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
|
||||
if (!--ctx->qp_table[tind].refcnt)
|
||||
free(ctx->qp_table[tind].table);
|
||||
@@ -740,7 +740,7 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
|
||||
struct ibv_recv_wr **bad_wr)
|
||||
{
|
||||
int ret = 0;
|
||||
- int nreq;
|
||||
+ unsigned int nreq;
|
||||
struct ibv_sge *sg;
|
||||
struct hns_roce_rc_rq_wqe *rq_wqe;
|
||||
struct hns_roce_qp *qp = to_hr_qp(ibvqp);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index d4b76b5..d0df51a 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -248,7 +248,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
||||
return srq->wqe_buf.buf + (n << srq->wqe_shift);
|
||||
}
|
||||
|
||||
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
|
||||
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
|
||||
{
|
||||
return idx_que->buf.buf + (n << idx_que->entry_shift);
|
||||
}
|
||||
@@ -352,7 +352,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
uint32_t qpn)
|
||||
{
|
||||
- int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
|
||||
if (ctx->qp_table[tind].refcnt)
|
||||
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
|
||||
@@ -982,9 +982,8 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp,
|
||||
- struct ibv_send_wr *wr, int nreq,
|
||||
- struct hns_roce_sge_info *sge_info)
|
||||
+static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
+ unsigned int nreq, struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
|
||||
struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
|
||||
@@ -1140,7 +1139,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
|
||||
}
|
||||
|
||||
static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
- int nreq, struct hns_roce_sge_info *sge_info)
|
||||
+ unsigned int nreq, struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
|
||||
struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From 031ccf570369d820dab067cf29fb17e338cd4b28 Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Tue, 9 Nov 2021 20:41:00 +0800
|
||||
Subject: libhns: Fix wrong print format for unsigned type
|
||||
|
||||
Change %d printf fortmat to %u for unsigned int variant.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 8840a9d..923c005 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -73,7 +73,7 @@ int hns_roce_u_query_device(struct ibv_context *context,
|
||||
sub_minor = raw_fw_ver & 0xffff;
|
||||
|
||||
snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver),
|
||||
- "%d.%d.%03d", major, minor, sub_minor);
|
||||
+ "%u.%u.%03u", major, minor, sub_minor);
|
||||
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From e451dbaff5f0dd1715b6411169e970021cd43f4f Mon Sep 17 00:00:00 2001
|
||||
From: Yixing Liu <liuyixing1@huawei.com>
|
||||
Date: Tue, 9 Nov 2021 20:40:59 +0800
|
||||
Subject: libhns: Remove redundant variable initialization
|
||||
|
||||
The variable of owner_bit has been assigned before the reference, so there
|
||||
is no need to initialize.
|
||||
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index d0df51a..5fb6477 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1399,9 +1399,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
{
|
||||
int nfreed = 0;
|
||||
bool is_recv_cqe;
|
||||
+ uint8_t owner_bit;
|
||||
uint16_t wqe_index;
|
||||
uint32_t prod_index;
|
||||
- uint8_t owner_bit = 0;
|
||||
struct hns_roce_v2_cqe *cqe, *dest;
|
||||
struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From 21d81f659d801230a1ccf1aadf9b1ecba5a3ccd8 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Tue, 9 Nov 2021 20:40:57 +0800
|
||||
Subject: libhns: Remove unused macros
|
||||
|
||||
These macros used to work, but are no longer used, they should be removed.
|
||||
|
||||
Fixes: 516b8d4e4ebe ("providers: Use the new match_device and allocate_device ops")
|
||||
Fixes: 887b78c80224 ("libhns: Add initial main frame")
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||||
index 3b31ad3..9dc4905 100644
|
||||
--- a/providers/hns/hns_roce_u.c
|
||||
+++ b/providers/hns/hns_roce_u.c
|
||||
@@ -41,9 +41,6 @@
|
||||
|
||||
static void hns_roce_free_context(struct ibv_context *ibctx);
|
||||
|
||||
-#define HID_LEN 15
|
||||
-#define DEV_MATCH_LEN 128
|
||||
-
|
||||
#ifndef PCI_VENDOR_ID_HUAWEI
|
||||
#define PCI_VENDOR_ID_HUAWEI 0x19E5
|
||||
#endif
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,545 +0,0 @@
|
||||
From 0851ae661c4fe4dd285c22c6acce462fc8004b8d Mon Sep 17 00:00:00 2001
|
||||
From: Yixian Liu <liuyixian@huawei.com>
|
||||
Date: Thu, 18 Nov 2021 22:46:10 +0800
|
||||
Subject: libhns: Refactor the poll one interface
|
||||
|
||||
Mainly about:
|
||||
|
||||
1. Separate the differences between various objects (such as sq, rq, srq)
|
||||
into functions.
|
||||
2. Optimize function names, variable names, and comments to increase code
|
||||
readability.
|
||||
3. Use map instead of switch branch to simplify the code.
|
||||
|
||||
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
|
||||
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 373 +++++++++++++++----------------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 10 +-
|
||||
2 files changed, 189 insertions(+), 194 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 5fb6477..1b4e91b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -154,59 +154,37 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe,
|
||||
- struct ibv_wc *wc)
|
||||
-{
|
||||
- unsigned int status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
|
||||
- CQE_BYTE_4_STATUS_S);
|
||||
- unsigned int cqe_status = status & HNS_ROCE_V2_CQE_STATUS_MASK;
|
||||
+static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ uint8_t status)
|
||||
+{
|
||||
+ static const struct {
|
||||
+ unsigned int cqe_status;
|
||||
+ enum ibv_wc_status wc_status;
|
||||
+ } map[] = {
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
|
||||
+ };
|
||||
|
||||
- switch (cqe_status) {
|
||||
- case HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR:
|
||||
- wc->status = IBV_WC_LOC_LEN_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR:
|
||||
- wc->status = IBV_WC_LOC_QP_OP_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_LOCAL_PROT_ERR:
|
||||
- wc->status = IBV_WC_LOC_PROT_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_WR_FLUSH_ERR:
|
||||
- wc->status = IBV_WC_WR_FLUSH_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR:
|
||||
- wc->status = IBV_WC_MW_BIND_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_BAD_RESP_ERR:
|
||||
- wc->status = IBV_WC_BAD_RESP_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR:
|
||||
- wc->status = IBV_WC_LOC_ACCESS_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR:
|
||||
- wc->status = IBV_WC_REM_INV_REQ_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR:
|
||||
- wc->status = IBV_WC_REM_ACCESS_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_REMOTE_OP_ERR:
|
||||
- wc->status = IBV_WC_REM_OP_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR:
|
||||
- wc->status = IBV_WC_RETRY_EXC_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR:
|
||||
- wc->status = IBV_WC_RNR_RETRY_EXC_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR:
|
||||
- wc->status = IBV_WC_REM_ABORT_ERR;
|
||||
- break;
|
||||
- case HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR:
|
||||
- wc->status = IBV_WC_REM_INV_RD_REQ_ERR;
|
||||
- break;
|
||||
- default:
|
||||
- wc->status = IBV_WC_GENERAL_ERR;
|
||||
- break;
|
||||
+ int i;
|
||||
+
|
||||
+ wc->status = IBV_WC_GENERAL_ERR;
|
||||
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
+ if (status == map[i].cqe_status) {
|
||||
+ wc->status = map[i].wc_status;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,6 +246,27 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
|
||||
pthread_spin_unlock(&srq->lock);
|
||||
}
|
||||
|
||||
+static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
|
||||
+ struct hns_roce_context *ctx,
|
||||
+ struct hns_roce_qp *hr_qp,
|
||||
+ struct hns_roce_srq **srq)
|
||||
+{
|
||||
+ uint32_t srqn;
|
||||
+
|
||||
+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
|
||||
+ srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
|
||||
+ CQE_BYTE_12_XRC_SRQN_S);
|
||||
+
|
||||
+ *srq = hns_roce_find_srq(ctx, srqn);
|
||||
+ if (!*srq)
|
||||
+ return -EINVAL;
|
||||
+ } else if (hr_qp->verbs_qp.qp.srq) {
|
||||
+ *srq = to_hr_srq(hr_qp->verbs_qp.qp.srq);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
|
||||
struct hns_roce_cq *cq)
|
||||
{
|
||||
@@ -332,7 +331,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
hns_roce_write512(qp->sq.db_reg, wqe);
|
||||
}
|
||||
|
||||
-static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
+static void update_cq_db(struct hns_roce_context *ctx,
|
||||
struct hns_roce_cq *cq)
|
||||
{
|
||||
struct hns_roce_db cq_db = {};
|
||||
@@ -378,19 +377,17 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
|
||||
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
||||
int attr_mask);
|
||||
|
||||
-static int hns_roce_flush_cqe(struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
|
||||
+static int hns_roce_flush_cqe(struct hns_roce_qp *hr_qp, uint8_t status)
|
||||
{
|
||||
struct ibv_qp_attr attr;
|
||||
int attr_mask;
|
||||
|
||||
- if ((wc->status != IBV_WC_SUCCESS) &&
|
||||
- (wc->status != IBV_WC_WR_FLUSH_ERR)) {
|
||||
+ if (status != HNS_ROCE_V2_CQE_WR_FLUSH_ERR) {
|
||||
attr_mask = IBV_QP_STATE;
|
||||
attr.qp_state = IBV_QPS_ERR;
|
||||
- hns_roce_u_v2_modify_qp(&(*cur_qp)->verbs_qp.qp, &attr,
|
||||
- attr_mask);
|
||||
+ hns_roce_u_v2_modify_qp(&hr_qp->verbs_qp.qp, &attr, attr_mask);
|
||||
|
||||
- (*cur_qp)->verbs_qp.qp.state = IBV_QPS_ERR;
|
||||
+ hr_qp->verbs_qp.qp.state = IBV_QPS_ERR;
|
||||
}
|
||||
|
||||
return V2_CQ_OK;
|
||||
@@ -409,41 +406,6 @@ static const unsigned int wc_send_op_map[] = {
|
||||
[HNS_ROCE_SQ_OP_BIND_MW] = IBV_WC_BIND_MW,
|
||||
};
|
||||
|
||||
-static void hns_roce_v2_get_opcode_from_sender(struct hns_roce_v2_cqe *cqe,
|
||||
- struct ibv_wc *wc)
|
||||
-{
|
||||
- uint32_t opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
|
||||
- CQE_BYTE_4_OPCODE_S);
|
||||
-
|
||||
- switch (opcode) {
|
||||
- case HNS_ROCE_SQ_OP_SEND:
|
||||
- case HNS_ROCE_SQ_OP_SEND_WITH_INV:
|
||||
- case HNS_ROCE_SQ_OP_RDMA_WRITE:
|
||||
- case HNS_ROCE_SQ_OP_BIND_MW:
|
||||
- wc->wc_flags = 0;
|
||||
- break;
|
||||
- case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
|
||||
- case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
|
||||
- wc->wc_flags = IBV_WC_WITH_IMM;
|
||||
- break;
|
||||
- case HNS_ROCE_SQ_OP_LOCAL_INV:
|
||||
- wc->wc_flags = IBV_WC_WITH_INV;
|
||||
- break;
|
||||
- case HNS_ROCE_SQ_OP_RDMA_READ:
|
||||
- case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
|
||||
- case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
|
||||
- wc->wc_flags = 0;
|
||||
- wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
- break;
|
||||
- default:
|
||||
- wc->status = IBV_WC_GENERAL_ERR;
|
||||
- wc->wc_flags = 0;
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- wc->opcode = wc_send_op_map[opcode];
|
||||
-}
|
||||
-
|
||||
static const unsigned int wc_rcv_op_map[] = {
|
||||
[HNS_ROCE_RECV_OP_RDMA_WRITE_IMM] = IBV_WC_RECV_RDMA_WITH_IMM,
|
||||
[HNS_ROCE_RECV_OP_SEND] = IBV_WC_RECV,
|
||||
@@ -451,9 +413,8 @@ static const unsigned int wc_rcv_op_map[] = {
|
||||
[HNS_ROCE_RECV_OP_SEND_WITH_INV] = IBV_WC_RECV,
|
||||
};
|
||||
|
||||
-static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe,
|
||||
- struct ibv_wc *wc,
|
||||
- uint32_t opcode)
|
||||
+static void get_opcode_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ uint32_t opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case HNS_ROCE_RECV_OP_SEND:
|
||||
@@ -476,9 +437,8 @@ static void hns_roce_v2_get_opcode_from_receiver(struct hns_roce_v2_cqe *cqe,
|
||||
wc->opcode = wc_rcv_op_map[opcode];
|
||||
}
|
||||
|
||||
-static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
|
||||
- struct hns_roce_qp **cur_qp,
|
||||
- struct ibv_wc *wc, uint32_t opcode)
|
||||
+static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ struct hns_roce_qp **cur_qp, uint32_t opcode)
|
||||
{
|
||||
if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
|
||||
(opcode == HNS_ROCE_RECV_OP_SEND ||
|
||||
@@ -521,26 +481,117 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
|
||||
return V2_CQ_OK;
|
||||
}
|
||||
|
||||
+static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
|
||||
+{
|
||||
+ wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
|
||||
+ CQE_BYTE_32_SL_S);
|
||||
+ wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
|
||||
+ CQE_BYTE_32_RMT_QPN_S);
|
||||
+ wc->slid = 0;
|
||||
+ wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
|
||||
+ IBV_WC_GRH : 0;
|
||||
+ wc->pkey_index = 0;
|
||||
+}
|
||||
+
|
||||
+static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ uint32_t wqe_idx;
|
||||
+
|
||||
+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
|
||||
+ CQE_BYTE_4_WQE_IDX_S);
|
||||
+ wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
|
||||
+ hns_roce_free_srq_wqe(srq, wqe_idx);
|
||||
+}
|
||||
+
|
||||
+static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ struct hns_roce_qp *hr_qp, uint8_t opcode)
|
||||
+{
|
||||
+ struct hns_roce_wq *wq;
|
||||
+ int ret;
|
||||
+
|
||||
+ wq = &hr_qp->rq;
|
||||
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+
|
||||
+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
|
||||
+ parse_for_ud_qp(cqe, wc);
|
||||
+
|
||||
+ ret = handle_recv_inl_wqe(cqe, wc, &hr_qp, opcode);
|
||||
+ if (ret) {
|
||||
+ verbs_err(verbs_get_ctx(hr_qp->verbs_qp.qp.context),
|
||||
+ PFX "failed to handle recv inline wqe!\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
+ struct hns_roce_qp *hr_qp, uint8_t opcode)
|
||||
+{
|
||||
+ struct hns_roce_wq *wq;
|
||||
+ uint32_t wqe_idx;
|
||||
+
|
||||
+ wq = &hr_qp->sq;
|
||||
+ /*
|
||||
+ * in case of signalling, the tail pointer needs to be updated
|
||||
+ * according to the wqe idx in the current cqe first
|
||||
+ */
|
||||
+ if (hr_qp->sq_signal_bits) {
|
||||
+ wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
|
||||
+ CQE_BYTE_4_WQE_IDX_S);
|
||||
+ /* get the processed wqes num since last signalling */
|
||||
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
|
||||
+ }
|
||||
+ /* write the wr_id of wq into the wc */
|
||||
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+
|
||||
+ switch (opcode) {
|
||||
+ case HNS_ROCE_SQ_OP_SEND:
|
||||
+ case HNS_ROCE_SQ_OP_SEND_WITH_INV:
|
||||
+ case HNS_ROCE_SQ_OP_RDMA_WRITE:
|
||||
+ case HNS_ROCE_SQ_OP_BIND_MW:
|
||||
+ wc->wc_flags = 0;
|
||||
+ break;
|
||||
+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
|
||||
+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
|
||||
+ wc->wc_flags = IBV_WC_WITH_IMM;
|
||||
+ break;
|
||||
+ case HNS_ROCE_SQ_OP_LOCAL_INV:
|
||||
+ wc->wc_flags = IBV_WC_WITH_INV;
|
||||
+ break;
|
||||
+ case HNS_ROCE_SQ_OP_RDMA_READ:
|
||||
+ case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
|
||||
+ case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
|
||||
+ wc->wc_flags = 0;
|
||||
+ wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
+ break;
|
||||
+ default:
|
||||
+ wc->status = IBV_WC_GENERAL_ERR;
|
||||
+ wc->wc_flags = 0;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ wc->opcode = wc_send_op_map[opcode];
|
||||
+}
|
||||
+
|
||||
static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
|
||||
{
|
||||
struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
|
||||
struct hns_roce_srq *srq = NULL;
|
||||
- struct hns_roce_wq *wq = NULL;
|
||||
struct hns_roce_v2_cqe *cqe;
|
||||
- uint16_t wqe_ctr;
|
||||
- uint32_t opcode;
|
||||
- uint32_t srqn;
|
||||
+ uint8_t opcode;
|
||||
+ uint8_t status;
|
||||
uint32_t qpn;
|
||||
- int is_send;
|
||||
- int ret;
|
||||
+ bool is_send;
|
||||
|
||||
- /* According to CI, find the relative cqe */
|
||||
cqe = next_cqe_sw_v2(cq);
|
||||
if (!cqe)
|
||||
return V2_CQ_EMPTY;
|
||||
|
||||
- /* Get the next cqe, CI will be added gradually */
|
||||
++cq->cons_index;
|
||||
|
||||
udma_from_device_barrier();
|
||||
@@ -548,102 +599,48 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
|
||||
CQE_BYTE_16_LCL_QPN_S);
|
||||
|
||||
- is_send = (roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) ==
|
||||
- HNS_ROCE_V2_CQE_IS_SQ);
|
||||
-
|
||||
- /* if qp is zero, it will not get the correct qpn */
|
||||
+ /* if cur qp is null, then could not get the correct qpn */
|
||||
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
|
||||
*cur_qp = hns_roce_v2_find_qp(ctx, qpn);
|
||||
if (!*cur_qp)
|
||||
return V2_CQ_POLL_ERR;
|
||||
}
|
||||
- wc->qp_num = qpn;
|
||||
|
||||
- if ((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
|
||||
- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
|
||||
- CQE_BYTE_12_XRC_SRQN_S);
|
||||
+ status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
|
||||
+ CQE_BYTE_4_STATUS_S);
|
||||
+ opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
|
||||
+ CQE_BYTE_4_OPCODE_S);
|
||||
+ is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ;
|
||||
+ if (is_send) {
|
||||
+ parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
|
||||
+ } else {
|
||||
+ wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
+ get_opcode_for_resp(cqe, wc, opcode);
|
||||
|
||||
- srq = hns_roce_find_srq(ctx, srqn);
|
||||
- if (!srq)
|
||||
+ if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq))
|
||||
return V2_CQ_POLL_ERR;
|
||||
- } else if ((*cur_qp)->verbs_qp.qp.srq) {
|
||||
- srq = to_hr_srq((*cur_qp)->verbs_qp.qp.srq);
|
||||
- }
|
||||
|
||||
- if (is_send) {
|
||||
- wq = &(*cur_qp)->sq;
|
||||
- /*
|
||||
- * if sq_signal_bits is 1, the tail pointer first update to
|
||||
- * the wqe corresponding the current cqe
|
||||
- */
|
||||
- if ((*cur_qp)->sq_signal_bits) {
|
||||
- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4,
|
||||
- CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S));
|
||||
- /*
|
||||
- * wq->tail will plus a positive number every time,
|
||||
- * when wq->tail exceeds 32b, it is 0 and acc
|
||||
- */
|
||||
- wq->tail += (wqe_ctr - (uint16_t) wq->tail) &
|
||||
- (wq->wqe_cnt - 1);
|
||||
+ if (srq) {
|
||||
+ parse_cqe_for_srq(cqe, wc, srq);
|
||||
+ } else {
|
||||
+ if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
}
|
||||
- /* write the wr_id of wq into the wc */
|
||||
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
- ++wq->tail;
|
||||
- } else if (srq) {
|
||||
- wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4,
|
||||
- CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S));
|
||||
- wc->wr_id = srq->wrid[wqe_ctr & (srq->wqe_cnt - 1)];
|
||||
- hns_roce_free_srq_wqe(srq, wqe_ctr);
|
||||
- } else {
|
||||
- wq = &(*cur_qp)->rq;
|
||||
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
- ++wq->tail;
|
||||
}
|
||||
|
||||
+ wc->qp_num = qpn;
|
||||
+
|
||||
/*
|
||||
- * HW maintains wc status, set the err type and directly return, after
|
||||
- * generated the incorrect CQE
|
||||
+ * once a cqe in error status, the driver needs to help the HW to
|
||||
+ * generated flushed cqes for all subsequent wqes
|
||||
*/
|
||||
- if (roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
|
||||
- CQE_BYTE_4_STATUS_S) != HNS_ROCE_V2_CQE_SUCCESS) {
|
||||
- hns_roce_v2_handle_error_cqe(cqe, wc);
|
||||
- return hns_roce_flush_cqe(cur_qp, wc);
|
||||
+ if (status != HNS_ROCE_V2_CQE_SUCCESS) {
|
||||
+ handle_error_cqe(cqe, wc, status);
|
||||
+ return hns_roce_flush_cqe(*cur_qp, status);
|
||||
}
|
||||
|
||||
wc->status = IBV_WC_SUCCESS;
|
||||
|
||||
- /*
|
||||
- * According to the opcode type of cqe, mark the opcode and other
|
||||
- * information of wc
|
||||
- */
|
||||
- if (is_send) {
|
||||
- hns_roce_v2_get_opcode_from_sender(cqe, wc);
|
||||
- } else {
|
||||
- /* Get opcode and flag in rq&srq */
|
||||
- wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
|
||||
- CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK;
|
||||
- hns_roce_v2_get_opcode_from_receiver(cqe, wc, opcode);
|
||||
-
|
||||
- ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc, opcode);
|
||||
- if (ret) {
|
||||
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
- PFX "failed to handle recv inline wqe!\n");
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- wc->sl = (uint8_t)roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
|
||||
- CQE_BYTE_32_SL_S);
|
||||
- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
|
||||
- CQE_BYTE_32_RMT_QPN_S);
|
||||
- wc->slid = 0;
|
||||
- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
|
||||
- IBV_WC_GRH : 0;
|
||||
- wc->pkey_index = 0;
|
||||
- }
|
||||
-
|
||||
return V2_CQ_OK;
|
||||
}
|
||||
|
||||
@@ -668,7 +665,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
|
||||
*cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M;
|
||||
else
|
||||
- hns_roce_v2_update_cq_cons_index(ctx, cq);
|
||||
+ update_cq_db(ctx, cq);
|
||||
}
|
||||
|
||||
pthread_spin_unlock(&cq->lock);
|
||||
@@ -1438,7 +1435,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
if (nfreed) {
|
||||
cq->cons_index += nfreed;
|
||||
udma_to_device_barrier();
|
||||
- hns_roce_v2_update_cq_cons_index(ctx, cq);
|
||||
+ update_cq_db(ctx, cq);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index af72cd7..51a1df4 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -33,7 +33,10 @@
|
||||
#ifndef _HNS_ROCE_U_HW_V2_H
|
||||
#define _HNS_ROCE_U_HW_V2_H
|
||||
|
||||
-#define HNS_ROCE_V2_CQE_IS_SQ 0
|
||||
+enum {
|
||||
+ CQE_FOR_SQ,
|
||||
+ CQE_FOR_RQ,
|
||||
+};
|
||||
|
||||
#define HNS_ROCE_V2_CQ_DB_REQ_SOL 1
|
||||
#define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0
|
||||
@@ -94,11 +97,6 @@ enum {
|
||||
V2_CQ_POLL_ERR = -2,
|
||||
};
|
||||
|
||||
-enum {
|
||||
- HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
|
||||
- HNS_ROCE_V2_CQE_OPCODE_MASK = 0x1f,
|
||||
-};
|
||||
-
|
||||
enum {
|
||||
HNS_ROCE_V2_CQE_SUCCESS = 0x00,
|
||||
HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR = 0x01,
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
From 72f495e542c1c458e71fd6971f412edec41830e1 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Wed, 24 Nov 2021 19:03:54 +0800
|
||||
Subject: libhns: hr ilog32() should be represented by a function instead of a
|
||||
macro
|
||||
|
||||
The compiler will check whether the modifiers of the function are of the
|
||||
correct type.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 92dc26c..c1ae1c9 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -101,8 +101,6 @@
|
||||
#define roce_set_bit(origin, shift, val) \
|
||||
roce_set_field((origin), (1ul << (shift)), (shift), (val))
|
||||
|
||||
-#define hr_ilog32(n) ilog32((unsigned int)(n) - 1)
|
||||
-
|
||||
enum {
|
||||
HNS_ROCE_QP_TABLE_BITS = 8,
|
||||
HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
|
||||
@@ -326,6 +324,11 @@ static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
|
||||
return hr_hw_page_align(count << buf_shift);
|
||||
}
|
||||
|
||||
+static inline unsigned int hr_ilog32(unsigned int count)
|
||||
+{
|
||||
+ return ilog32(count - 1);
|
||||
+}
|
||||
+
|
||||
static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev)
|
||||
{
|
||||
return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From 61911051eec0f984537c2762208b8ecbc875d5d3 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Fri, 26 Nov 2021 16:53:18 +0800
|
||||
Subject: libhns: Fix the size setting error when copying CQE in clean cq()
|
||||
|
||||
The size of CQE is different for different versions of hardware, so the
|
||||
driver needs to specify the size of CQE explicitly.
|
||||
|
||||
Fixes: 3546e6b69ac8 ("libhns: Add support for CQE in size of 64 Bytes")
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 1b4e91b..b13b6dc 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1426,7 +1426,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
(prod_index + nfreed) & cq->ibv_cq.cqe);
|
||||
owner_bit = roce_get_bit(dest->byte_4,
|
||||
CQE_BYTE_4_OWNER_S);
|
||||
- memcpy(dest, cqe, sizeof(*cqe));
|
||||
+ memcpy(dest, cqe, cq->cqe_size);
|
||||
roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S,
|
||||
owner_bit);
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
From 8fbb85bae3fd2632da80e77d02bbbe73aac85f88 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Fri, 26 Nov 2021 17:55:32 +0800
|
||||
Subject: libhns: Fix the problem that XRC does not need to create RQ
|
||||
|
||||
XRC QP does not require RQ, so RQ should not be created.
|
||||
|
||||
Fixes: 4ed874a5cf30 ("libhns: Add support for XRC for HIP09")
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 13 ++++++++++++-
|
||||
1 file changed, 12 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 923c005..557d075 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -774,12 +774,22 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr)
|
||||
+{
|
||||
+ if (attr->qp_type == IBV_QPT_XRC_SEND ||
|
||||
+ attr->qp_type == IBV_QPT_XRC_RECV || attr->srq)
|
||||
+ return 0;
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
static int verify_qp_create_cap(struct hns_roce_context *ctx,
|
||||
struct ibv_qp_init_attr_ex *attr)
|
||||
{
|
||||
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
|
||||
struct ibv_qp_cap *cap = &attr->cap;
|
||||
uint32_t min_wqe_num;
|
||||
+ int has_rq;
|
||||
|
||||
if (!cap->max_send_wr && attr->qp_type != IBV_QPT_XRC_RECV)
|
||||
return -EINVAL;
|
||||
@@ -790,7 +800,8 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
|
||||
cap->max_recv_sge > ctx->max_sge)
|
||||
return -EINVAL;
|
||||
|
||||
- if (attr->srq) {
|
||||
+ has_rq = hns_roce_qp_has_rq(attr);
|
||||
+ if (!has_rq) {
|
||||
cap->max_recv_wr = 0;
|
||||
cap->max_recv_sge = 0;
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
From 29fd05367349c7909949206a13092031b689eca7 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Tue, 30 Nov 2021 20:46:14 +0800
|
||||
Subject: libhns: Add vendor_err information for error WC
|
||||
|
||||
ULP can get more error information of CQ though verbs.
|
||||
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 3 +++
|
||||
providers/hns/hns_roce_u_hw_v2.h | 3 +++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index b13b6dc..18399e9 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -186,6 +186,9 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
break;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M,
|
||||
+ CQE_BYTE_16_SUB_STATUS_S);
|
||||
}
|
||||
|
||||
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index 51a1df4..014cb8c 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -184,6 +184,9 @@ struct hns_roce_v2_cqe {
|
||||
#define CQE_BYTE_16_LCL_QPN_S 0
|
||||
#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S)
|
||||
|
||||
+#define CQE_BYTE_16_SUB_STATUS_S 24
|
||||
+#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S)
|
||||
+
|
||||
#define CQE_BYTE_28_SMAC_S 0
|
||||
#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S)
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
From 46548879b84e8c502198a549d82ec079ebc8b9a0 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Thu, 2 Dec 2021 21:44:26 +0800
|
||||
Subject: libhns: Forcibly rewrite the inline flag of WQE
|
||||
|
||||
When a non-inline WR reuses a WQE that was used for inline the last time,
|
||||
the remaining inline flag should be cleared.
|
||||
|
||||
Fixes: cbdf5e32a855 ("libhns: Reimplement verbs of post_send and post_recv for hip08 RoCE")
|
||||
Fixes: 82fc508a6625 ("libhns: Add support for UD inline")
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 16 +++++++---------
|
||||
1 file changed, 7 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 18399e9..4eaa929 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -876,8 +876,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
if (!check_inl_data_len(qp, sge_info->total_len))
|
||||
return -EINVAL;
|
||||
|
||||
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S, 1);
|
||||
-
|
||||
if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) {
|
||||
roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
|
||||
UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0);
|
||||
@@ -993,6 +991,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
!!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
|
||||
!!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
+ roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S,
|
||||
+ !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
|
||||
ret = check_ud_opcode(ud_sq_wqe, wr);
|
||||
if (ret)
|
||||
@@ -1044,8 +1044,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
|
||||
dseg += sizeof(struct hns_roce_rc_sq_wqe);
|
||||
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1);
|
||||
-
|
||||
if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) {
|
||||
roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
|
||||
0);
|
||||
@@ -1150,13 +1148,13 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
return ret;
|
||||
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
|
||||
- (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0);
|
||||
-
|
||||
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
|
||||
- (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0);
|
||||
-
|
||||
+ !!(wr->send_flags & IBV_SEND_FENCE));
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
|
||||
- (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0);
|
||||
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
|
||||
+ !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
|
||||
roce_set_field(rc_sq_wqe->byte_20,
|
||||
RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
From 2194680136400d6a5f12298ff4993fa6f51c2e10 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Wed, 8 Dec 2021 19:03:56 +0800
|
||||
Subject: libhns: Forcibly rewrite the strong-order flag of WQE
|
||||
|
||||
The Local Invalid operation sets so flag, otherwise clears so flag.
|
||||
|
||||
Fixes: a9ae7e9bfb5d ("libhns: Add local invalidate MR support for hip08")
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4eaa929..cf871ab 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1143,10 +1143,6 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
int ret;
|
||||
|
||||
- ret = check_rc_opcode(rc_sq_wqe, wr);
|
||||
- if (ret)
|
||||
- return ret;
|
||||
-
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
|
||||
!!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
|
||||
@@ -1155,6 +1151,11 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
!!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
|
||||
!!(wr->send_flags & IBV_SEND_INLINE));
|
||||
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0);
|
||||
+
|
||||
+ ret = check_rc_opcode(rc_sq_wqe, wr);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
roce_set_field(rc_sq_wqe->byte_20,
|
||||
RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
From 2a2e3ece2ff801e8d8e4915a56fe3fff8399d6a0 Mon Sep 17 00:00:00 2001
|
||||
From: Firas Jahjah <firasj@amazon.com>
|
||||
Date: Tue, 28 Dec 2021 15:58:37 +0200
|
||||
Subject: util: Fix mmio memcpy on ARM
|
||||
|
||||
The below commit added a new implementation of mmio_memcpy_x64() for
|
||||
ARM which was broken. The destination buffer must be advanced so we
|
||||
don't copy to the same 64 bytes.
|
||||
|
||||
Fixes: 159933c37 ("libhns: Add support for direct wqe")
|
||||
Reviewed-by: Daniel Kranzdorf <dkkranzd@amazon.com>
|
||||
Reviewed-by: Yossi Leybovich <sleybo@amazon.com>
|
||||
Signed-off-by: Firas Jahjah <firasj@amazon.com>
|
||||
---
|
||||
util/mmio.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/util/mmio.h b/util/mmio.h
|
||||
index 01d1455..5974058 100644
|
||||
--- a/util/mmio.h
|
||||
+++ b/util/mmio.h
|
||||
@@ -225,6 +225,7 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
_mmio_memcpy_x64_64b(dest, src);
|
||||
bytecnt -= sizeof(uint64x2x4_t);
|
||||
src += sizeof(uint64x2x4_t);
|
||||
+ dest += sizeof(uint64x2x4_t);
|
||||
} while (bytecnt > 0);
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,550 +0,0 @@
|
||||
From 532c4b6babe97e3023a049f1c6bd8a8e3ad95140 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Sat, 25 Dec 2021 17:42:55 +0800
|
||||
Subject: libhns: Use new interfaces hr reg ***() to operate the WQE field
|
||||
|
||||
Use hr_reg_xxx() to simply the codes for filling fields.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 170 ++++++++++------------------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 184 ++++++++++++++-----------------
|
||||
2 files changed, 144 insertions(+), 210 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index cf871ab..0cff12b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -323,13 +323,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
|
||||
|
||||
/* All kinds of DirectWQE have the same header field layout */
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1);
|
||||
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M,
|
||||
- RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl);
|
||||
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M,
|
||||
- RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT);
|
||||
- roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
|
||||
- RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
|
||||
+ hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_L, qp->sl);
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT);
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head);
|
||||
|
||||
hns_roce_write512(qp->sq.db_reg, wqe);
|
||||
}
|
||||
@@ -834,29 +831,15 @@ static void fill_ud_inn_inl_data(const struct ibv_send_wr *wr,
|
||||
tmp += wr->sg_list[i].length;
|
||||
}
|
||||
|
||||
- roce_set_field(ud_sq_wqe->msg_len,
|
||||
- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M,
|
||||
- UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S,
|
||||
- *loc & 0xffff);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->sge_num_pd,
|
||||
- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M,
|
||||
- UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S,
|
||||
- (*loc >> 16) & 0xff);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_15_0, *loc & 0xffff);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_23_16, (*loc >> 16) & 0xff);
|
||||
|
||||
tmp_data = *loc >> 24;
|
||||
loc++;
|
||||
tmp_data |= ((*loc & 0xffff) << 8);
|
||||
|
||||
- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx,
|
||||
- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M,
|
||||
- UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S,
|
||||
- tmp_data);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->udpspn_rsv,
|
||||
- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M,
|
||||
- UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S,
|
||||
- *loc >> 16);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_47_24, tmp_data);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_INLINE_DATA_63_48, *loc >> 16);
|
||||
}
|
||||
|
||||
static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
|
||||
@@ -877,13 +860,11 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
return -EINVAL;
|
||||
|
||||
if (sge_info->total_len <= HNS_ROCE_MAX_UD_INL_INN_SZ) {
|
||||
- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
|
||||
- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 0);
|
||||
+ hr_reg_clear(ud_sq_wqe, UDWQE_INLINE_TYPE);
|
||||
|
||||
fill_ud_inn_inl_data(wr, ud_sq_wqe);
|
||||
} else {
|
||||
- roce_set_bit(ud_sq_wqe->rsv_msg_start_sge_idx,
|
||||
- UD_SQ_WQE_BYTE_20_INL_TYPE_S, 1);
|
||||
+ hr_reg_enable(ud_sq_wqe, UDWQE_INLINE_TYPE);
|
||||
|
||||
ret = fill_ext_sge_inl_data(qp, wr, sge_info);
|
||||
if (ret)
|
||||
@@ -891,8 +872,7 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
|
||||
sge_info->valid_num = sge_info->start_idx - sge_idx;
|
||||
|
||||
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M,
|
||||
- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -919,8 +899,7 @@ static int check_ud_opcode(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
|
||||
ud_sq_wqe->immtdata = get_immtdata(ib_op, wr);
|
||||
|
||||
- roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M,
|
||||
- UD_SQ_WQE_OPCODE_S, to_hr_opcode(ib_op));
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_OPCODE, to_hr_opcode(ib_op));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -931,24 +910,12 @@ static int fill_ud_av(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
if (unlikely(ah->av.sl > MAX_SERVICE_LEVEL))
|
||||
return EINVAL;
|
||||
|
||||
- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M,
|
||||
- UD_SQ_WQE_SL_S, ah->av.sl);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M,
|
||||
- UD_SQ_WQE_PD_S, to_hr_pd(ah->ibv_ah.pd)->pdn);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M,
|
||||
- UD_SQ_WQE_TCLASS_S, ah->av.tclass);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M,
|
||||
- UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M,
|
||||
- UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel);
|
||||
-
|
||||
- roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M,
|
||||
- UD_SQ_WQE_UDP_SPN_S, ah->av.udp_sport);
|
||||
-
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_SL, ah->av.sl);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_PD, to_hr_pd(ah->ibv_ah.pd)->pdn);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_TCLASS, ah->av.tclass);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_HOPLIMIT, ah->av.hop_limit);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_FLOW_LABEL, ah->av.flowlabel);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_UDPSPN, ah->av.udp_sport);
|
||||
memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
|
||||
ud_sq_wqe->sgid_index = ah->av.gid_index;
|
||||
memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE);
|
||||
@@ -962,17 +929,14 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
- roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx,
|
||||
- UD_SQ_WQE_MSG_START_SGE_IDX_M,
|
||||
- UD_SQ_WQE_MSG_START_SGE_IDX_S,
|
||||
- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_MSG_START_SGE_IDX,
|
||||
+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
|
||||
set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
||||
|
||||
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
- roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M,
|
||||
- UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
|
||||
|
||||
if (wr->send_flags & IBV_SEND_INLINE)
|
||||
ret = set_ud_inl(qp, wr, ud_sq_wqe, sge_info);
|
||||
@@ -987,12 +951,12 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
|
||||
int ret = 0;
|
||||
|
||||
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_BYTE_4_INL_S,
|
||||
- !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_CQE,
|
||||
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_SE,
|
||||
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
+ hr_reg_write_bool(ud_sq_wqe, UDWQE_INLINE,
|
||||
+ !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
|
||||
ret = check_ud_opcode(ud_sq_wqe, wr);
|
||||
if (ret)
|
||||
@@ -1001,8 +965,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey & 0x80000000 ?
|
||||
qp->qkey : wr->wr.ud.remote_qkey);
|
||||
|
||||
- roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M,
|
||||
- UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn);
|
||||
+ hr_reg_write(ud_sq_wqe, UDWQE_DQPN, wr->wr.ud.remote_qpn);
|
||||
|
||||
ret = fill_ud_av(ud_sq_wqe, ah);
|
||||
if (ret)
|
||||
@@ -1021,8 +984,8 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
|
||||
udma_to_device_barrier();
|
||||
|
||||
- roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S,
|
||||
- ~((qp->sq.head + nreq) >> qp->sq.shift));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_OWNER,
|
||||
+ !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1045,8 +1008,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
dseg += sizeof(struct hns_roce_rc_sq_wqe);
|
||||
|
||||
if (sge_info->total_len <= HNS_ROCE_MAX_RC_INL_INN_SZ) {
|
||||
- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
|
||||
- 0);
|
||||
+ hr_reg_clear(rc_sq_wqe, RCWQE_INLINE_TYPE);
|
||||
|
||||
for (i = 0; i < wr->num_sge; i++) {
|
||||
memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr),
|
||||
@@ -1054,8 +1016,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
dseg += wr->sg_list[i].length;
|
||||
}
|
||||
} else {
|
||||
- roce_set_bit(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_INL_TYPE_S,
|
||||
- 1);
|
||||
+ hr_reg_enable(rc_sq_wqe, RCWQE_INLINE_TYPE);
|
||||
|
||||
ret = fill_ext_sge_inl_data(qp, wr, sge_info);
|
||||
if (ret)
|
||||
@@ -1063,9 +1024,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
|
||||
sge_info->valid_num = sge_info->start_idx - sge_idx;
|
||||
|
||||
- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
|
||||
- RC_SQ_WQE_BYTE_16_SGE_NUM_S,
|
||||
- sge_info->valid_num);
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1074,17 +1033,16 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
|
||||
const struct ibv_send_wr *wr)
|
||||
{
|
||||
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S,
|
||||
- wr->bind_mw.mw->type - 1);
|
||||
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S,
|
||||
- (wr->bind_mw.bind_info.mw_access_flags &
|
||||
- IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0);
|
||||
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S,
|
||||
- (wr->bind_mw.bind_info.mw_access_flags &
|
||||
- IBV_ACCESS_REMOTE_READ) ? 1 : 0);
|
||||
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S,
|
||||
- (wr->bind_mw.bind_info.mw_access_flags &
|
||||
- IBV_ACCESS_REMOTE_WRITE) ? 1 : 0);
|
||||
+ unsigned int access = wr->bind_mw.bind_info.mw_access_flags;
|
||||
+
|
||||
+ hr_reg_write_bool(wqe, RCWQE_MW_TYPE, wr->bind_mw.mw->type - 1);
|
||||
+ hr_reg_write_bool(wqe, RCWQE_MW_RA_EN,
|
||||
+ !!(access & IBV_ACCESS_REMOTE_ATOMIC));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_MW_RR_EN,
|
||||
+ !!(access & IBV_ACCESS_REMOTE_READ));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_MW_RW_EN,
|
||||
+ !!(access & IBV_ACCESS_REMOTE_WRITE));
|
||||
+
|
||||
wqe->new_rkey = htole32(wr->bind_mw.rkey);
|
||||
wqe->byte_16 = htole32(wr->bind_mw.bind_info.length &
|
||||
HNS_ROCE_ADDRESS_MASK);
|
||||
@@ -1117,7 +1075,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
|
||||
wqe->va = htole64(wr->wr.atomic.remote_addr);
|
||||
break;
|
||||
case IBV_WR_LOCAL_INV:
|
||||
- roce_set_bit(wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1);
|
||||
+ hr_reg_enable(wqe, RCWQE_SO);
|
||||
/* fallthrough */
|
||||
case IBV_WR_SEND_WITH_INV:
|
||||
wqe->inv_key = htole32(wr->invalidate_rkey);
|
||||
@@ -1130,8 +1088,7 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
|
||||
break;
|
||||
}
|
||||
|
||||
- roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M,
|
||||
- RC_SQ_WQE_BYTE_4_OPCODE_S, to_hr_opcode(wr->opcode));
|
||||
+ hr_reg_write(wqe, RCWQE_OPCODE, to_hr_opcode(wr->opcode));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1143,24 +1100,22 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
int ret;
|
||||
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_FENCE));
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S,
|
||||
- !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 0);
|
||||
+ hr_reg_write_bool(wqe, RCWQE_CQE,
|
||||
+ !!(wr->send_flags & IBV_SEND_SIGNALED));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_FENCE,
|
||||
+ !!(wr->send_flags & IBV_SEND_FENCE));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_SE,
|
||||
+ !!(wr->send_flags & IBV_SEND_SOLICITED));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_INLINE,
|
||||
+ !!(wr->send_flags & IBV_SEND_INLINE));
|
||||
+ hr_reg_clear(wqe, RCWQE_SO);
|
||||
|
||||
ret = check_rc_opcode(rc_sq_wqe, wr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- roce_set_field(rc_sq_wqe->byte_20,
|
||||
- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
|
||||
- RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
|
||||
- sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_MSG_START_SGE_IDX,
|
||||
+ sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
|
||||
if (wr->opcode == IBV_WR_BIND_MW)
|
||||
goto wqe_valid;
|
||||
@@ -1172,8 +1127,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
|
||||
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
- roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
|
||||
- RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num);
|
||||
+ hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
|
||||
|
||||
if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
|
||||
wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
|
||||
@@ -1196,8 +1150,8 @@ wqe_valid:
|
||||
if (qp->flags & HNS_ROCE_QP_CAP_OWNER_DB)
|
||||
udma_to_device_barrier();
|
||||
|
||||
- roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S,
|
||||
- ~((qp->sq.head + nreq) >> qp->sq.shift));
|
||||
+ hr_reg_write_bool(wqe, RCWQE_OWNER,
|
||||
+ !((qp->sq.head + nreq) & BIT(qp->sq.shift)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1243,10 +1197,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
|
||||
switch (ibvqp->qp_type) {
|
||||
case IBV_QPT_XRC_SEND:
|
||||
- roce_set_field(wqe->byte_16,
|
||||
- RC_SQ_WQE_BYTE_16_XRC_SRQN_M,
|
||||
- RC_SQ_WQE_BYTE_16_XRC_SRQN_S,
|
||||
- wr->qp_type.xrc.remote_srqn);
|
||||
+ hr_reg_write(wqe, RCWQE_XRC_SRQN,
|
||||
+ wr->qp_type.xrc.remote_srqn);
|
||||
SWITCH_FALLTHROUGH;
|
||||
case IBV_QPT_RC:
|
||||
ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index 014cb8c..4330b7d 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -220,53 +220,44 @@ struct hns_roce_rc_sq_wqe {
|
||||
__le64 va;
|
||||
};
|
||||
|
||||
-#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
|
||||
-#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5
|
||||
-#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13
|
||||
-#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15
|
||||
-#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_OWNER_S 7
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_CQE_S 8
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_FENCE_S 9
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_SO_S 10
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_SE_S 11
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_INLINE_S 12
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_MW_TYPE_S 14
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_ATOMIC_S 20
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_RDMA_READ_S 21
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_4_FLAG_S 31
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
|
||||
-#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
|
||||
- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_16_SGE_NUM_S 24
|
||||
-#define RC_SQ_WQE_BYTE_16_SGE_NUM_M \
|
||||
- (((1UL << 8) - 1) << RC_SQ_WQE_BYTE_16_SGE_NUM_S)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
|
||||
-#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M \
|
||||
- (((1UL << 24) - 1) << RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S)
|
||||
-
|
||||
-#define RC_SQ_WQE_BYTE_20_INL_TYPE_S 31
|
||||
+#define RCWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_rc_sq_wqe, h, l)
|
||||
+
|
||||
+#define RCWQE_OPCODE RCWQE_FIELD_LOC(4, 0)
|
||||
+#define RCWQE_DB_SL_L RCWQE_FIELD_LOC(6, 5)
|
||||
+#define RCWQE_SQPN_L RCWQE_FIELD_LOC(6, 5)
|
||||
+#define RCWQE_OWNER RCWQE_FIELD_LOC(7, 7)
|
||||
+#define RCWQE_CQE RCWQE_FIELD_LOC(8, 8)
|
||||
+#define RCWQE_FENCE RCWQE_FIELD_LOC(9, 9)
|
||||
+#define RCWQE_SO RCWQE_FIELD_LOC(10, 10)
|
||||
+#define RCWQE_SE RCWQE_FIELD_LOC(11, 11)
|
||||
+#define RCWQE_INLINE RCWQE_FIELD_LOC(12, 12)
|
||||
+#define RCWQE_DB_SL_H RCWQE_FIELD_LOC(14, 13)
|
||||
+#define RCWQE_WQE_IDX RCWQE_FIELD_LOC(30, 15)
|
||||
+#define RCWQE_SQPN_H RCWQE_FIELD_LOC(30, 13)
|
||||
+#define RCWQE_FLAG RCWQE_FIELD_LOC(31, 31)
|
||||
+#define RCWQE_MSG_LEN RCWQE_FIELD_LOC(63, 32)
|
||||
+#define RCWQE_INV_KEY_IMMTDATA RCWQE_FIELD_LOC(95, 64)
|
||||
+#define RCWQE_XRC_SRQN RCWQE_FIELD_LOC(119, 96)
|
||||
+#define RCWQE_SGE_NUM RCWQE_FIELD_LOC(127, 120)
|
||||
+#define RCWQE_MSG_START_SGE_IDX RCWQE_FIELD_LOC(151, 128)
|
||||
+#define RCWQE_REDUCE_CODE RCWQE_FIELD_LOC(158, 152)
|
||||
+#define RCWQE_INLINE_TYPE RCWQE_FIELD_LOC(159, 159)
|
||||
+#define RCWQE_RKEY RCWQE_FIELD_LOC(191, 160)
|
||||
+#define RCWQE_VA_L RCWQE_FIELD_LOC(223, 192)
|
||||
+#define RCWQE_VA_H RCWQE_FIELD_LOC(255, 224)
|
||||
+#define RCWQE_LEN0 RCWQE_FIELD_LOC(287, 256)
|
||||
+#define RCWQE_LKEY0 RCWQE_FIELD_LOC(319, 288)
|
||||
+#define RCWQE_VA0_L RCWQE_FIELD_LOC(351, 320)
|
||||
+#define RCWQE_VA0_H RCWQE_FIELD_LOC(383, 352)
|
||||
+#define RCWQE_LEN1 RCWQE_FIELD_LOC(415, 384)
|
||||
+#define RCWQE_LKEY1 RCWQE_FIELD_LOC(447, 416)
|
||||
+#define RCWQE_VA1_L RCWQE_FIELD_LOC(479, 448)
|
||||
+#define RCWQE_VA1_H RCWQE_FIELD_LOC(511, 480)
|
||||
+
|
||||
+#define RCWQE_MW_TYPE RCWQE_FIELD_LOC(256, 256)
|
||||
+#define RCWQE_MW_RA_EN RCWQE_FIELD_LOC(258, 258)
|
||||
+#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
|
||||
+#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
|
||||
|
||||
struct hns_roce_v2_wqe_data_seg {
|
||||
__le32 len;
|
||||
@@ -323,60 +314,51 @@ struct hns_roce_ud_sq_wqe {
|
||||
uint8_t dgid[HNS_ROCE_GID_SIZE];
|
||||
};
|
||||
|
||||
-#define UD_SQ_WQE_OPCODE_S 0
|
||||
-#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_OWNER_S 7
|
||||
-
|
||||
-#define UD_SQ_WQE_CQE_S 8
|
||||
-
|
||||
-#define UD_SQ_WQE_SE_S 11
|
||||
-
|
||||
-#define UD_SQ_WQE_PD_S 0
|
||||
-#define UD_SQ_WQE_PD_M GENMASK(23, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_SGE_NUM_S 24
|
||||
-#define UD_SQ_WQE_SGE_NUM_M GENMASK(31, 24)
|
||||
-
|
||||
-#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0
|
||||
-#define UD_SQ_WQE_MSG_START_SGE_IDX_M GENMASK(23, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_UDP_SPN_S 16
|
||||
-#define UD_SQ_WQE_UDP_SPN_M GENMASK(31, 16)
|
||||
-
|
||||
-#define UD_SQ_WQE_DQPN_S 0
|
||||
-#define UD_SQ_WQE_DQPN_M GENMASK(23, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_VLAN_S 0
|
||||
-#define UD_SQ_WQE_VLAN_M GENMASK(15, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_HOPLIMIT_S 16
|
||||
-#define UD_SQ_WQE_HOPLIMIT_M GENMASK(23, 16)
|
||||
-
|
||||
-#define UD_SQ_WQE_TCLASS_S 24
|
||||
-#define UD_SQ_WQE_TCLASS_M GENMASK(31, 24)
|
||||
-
|
||||
-#define UD_SQ_WQE_FLOW_LABEL_S 0
|
||||
-#define UD_SQ_WQE_FLOW_LABEL_M GENMASK(19, 0)
|
||||
-
|
||||
-#define UD_SQ_WQE_SL_S 20
|
||||
-#define UD_SQ_WQE_SL_M GENMASK(23, 20)
|
||||
-
|
||||
-#define UD_SQ_WQE_VLAN_EN_S 30
|
||||
-
|
||||
-#define UD_SQ_WQE_LBI_S 31
|
||||
-
|
||||
-#define UD_SQ_WQE_BYTE_4_INL_S 12
|
||||
-#define UD_SQ_WQE_BYTE_20_INL_TYPE_S 31
|
||||
-
|
||||
-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_S 16
|
||||
-#define UD_SQ_WQE_BYTE_8_INL_DATE_15_0_M GENMASK(31, 16)
|
||||
-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_S 24
|
||||
-#define UD_SQ_WQE_BYTE_16_INL_DATA_23_16_M GENMASK(31, 24)
|
||||
-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_S 0
|
||||
-#define UD_SQ_WQE_BYTE_20_INL_DATA_47_24_M GENMASK(23, 0)
|
||||
-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_S 0
|
||||
-#define UD_SQ_WQE_BYTE_24_INL_DATA_63_48_M GENMASK(15, 0)
|
||||
+#define UDWQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ud_sq_wqe, h, l)
|
||||
+
|
||||
+#define UDWQE_OPCODE UDWQE_FIELD_LOC(4, 0)
|
||||
+#define UDWQE_DB_SL_L UDWQE_FIELD_LOC(6, 5)
|
||||
+#define UDWQE_OWNER UDWQE_FIELD_LOC(7, 7)
|
||||
+#define UDWQE_CQE UDWQE_FIELD_LOC(8, 8)
|
||||
+#define UDWQE_RSVD1 UDWQE_FIELD_LOC(10, 9)
|
||||
+#define UDWQE_SE UDWQE_FIELD_LOC(11, 11)
|
||||
+#define UDWQE_INLINE UDWQE_FIELD_LOC(12, 12)
|
||||
+#define UDWQE_DB_SL_H UDWQE_FIELD_LOC(14, 13)
|
||||
+#define UDWQE_WQE_IDX UDWQE_FIELD_LOC(30, 15)
|
||||
+#define UDWQE_FLAG UDWQE_FIELD_LOC(31, 31)
|
||||
+#define UDWQE_MSG_LEN UDWQE_FIELD_LOC(63, 32)
|
||||
+#define UDWQE_IMMTDATA UDWQE_FIELD_LOC(95, 64)
|
||||
+#define UDWQE_PD UDWQE_FIELD_LOC(119, 96)
|
||||
+#define UDWQE_SGE_NUM UDWQE_FIELD_LOC(127, 120)
|
||||
+#define UDWQE_MSG_START_SGE_IDX UDWQE_FIELD_LOC(151, 128)
|
||||
+#define UDWQE_RSVD3 UDWQE_FIELD_LOC(158, 152)
|
||||
+#define UDWQE_INLINE_TYPE UDWQE_FIELD_LOC(159, 159)
|
||||
+#define UDWQE_RSVD4 UDWQE_FIELD_LOC(175, 160)
|
||||
+#define UDWQE_UDPSPN UDWQE_FIELD_LOC(191, 176)
|
||||
+#define UDWQE_QKEY UDWQE_FIELD_LOC(223, 192)
|
||||
+#define UDWQE_DQPN UDWQE_FIELD_LOC(247, 224)
|
||||
+#define UDWQE_RSVD5 UDWQE_FIELD_LOC(255, 248)
|
||||
+#define UDWQE_VLAN UDWQE_FIELD_LOC(271, 256)
|
||||
+#define UDWQE_HOPLIMIT UDWQE_FIELD_LOC(279, 272)
|
||||
+#define UDWQE_TCLASS UDWQE_FIELD_LOC(287, 280)
|
||||
+#define UDWQE_FLOW_LABEL UDWQE_FIELD_LOC(307, 288)
|
||||
+#define UDWQE_SL UDWQE_FIELD_LOC(311, 308)
|
||||
+#define UDWQE_PORTN UDWQE_FIELD_LOC(314, 312)
|
||||
+#define UDWQE_RSVD6 UDWQE_FIELD_LOC(317, 315)
|
||||
+#define UDWQE_UD_VLAN_EN UDWQE_FIELD_LOC(318, 318)
|
||||
+#define UDWQE_LBI UDWQE_FIELD_LOC(319, 319)
|
||||
+#define UDWQE_DMAC_L UDWQE_FIELD_LOC(351, 320)
|
||||
+#define UDWQE_DMAC_H UDWQE_FIELD_LOC(367, 352)
|
||||
+#define UDWQE_GMV_IDX UDWQE_FIELD_LOC(383, 368)
|
||||
+#define UDWQE_DGID0 UDWQE_FIELD_LOC(415, 384)
|
||||
+#define UDWQE_DGID1 UDWQE_FIELD_LOC(447, 416)
|
||||
+#define UDWQE_DGID2 UDWQE_FIELD_LOC(479, 448)
|
||||
+#define UDWQE_DGID3 UDWQE_FIELD_LOC(511, 480)
|
||||
+
|
||||
+#define UDWQE_INLINE_DATA_15_0 UDWQE_FIELD_LOC(63, 48)
|
||||
+#define UDWQE_INLINE_DATA_23_16 UDWQE_FIELD_LOC(127, 120)
|
||||
+#define UDWQE_INLINE_DATA_47_24 UDWQE_FIELD_LOC(151, 128)
|
||||
+#define UDWQE_INLINE_DATA_63_48 UDWQE_FIELD_LOC(175, 160)
|
||||
|
||||
#define MAX_SERVICE_LEVEL 0x7
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,186 +0,0 @@
|
||||
From 49263de90f77f218710ef45bc0377d3e2019d811 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Sat, 25 Dec 2021 17:42:54 +0800
|
||||
Subject: libhns: Use new interfaces hr reg ***() to operate the DB field
|
||||
|
||||
Use hr_reg_xxx() to simply the codes for filling fields.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 59 ++++++++++++++------------------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 30 ++++++----------
|
||||
2 files changed, 35 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 0cff12b..e7dec0b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -291,10 +291,9 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
{
|
||||
struct hns_roce_db rq_db = {};
|
||||
|
||||
- rq_db.byte_4 = htole32(qpn);
|
||||
- roce_set_field(rq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
- HNS_ROCE_V2_RQ_DB);
|
||||
- rq_db.parameter = htole32(rq_head);
|
||||
+ hr_reg_write(&rq_db, DB_TAG, qpn);
|
||||
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
|
||||
+ hr_reg_write(&rq_db, DB_PI, rq_head);
|
||||
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
|
||||
}
|
||||
@@ -304,12 +303,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
{
|
||||
struct hns_roce_db sq_db = {};
|
||||
|
||||
- sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num);
|
||||
- roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
- HNS_ROCE_V2_SQ_DB);
|
||||
+ hr_reg_write(&sq_db, DB_TAG, qp->verbs_qp.qp.qp_num);
|
||||
+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
|
||||
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
|
||||
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
|
||||
|
||||
- sq_db.parameter = htole32(qp->sq.head);
|
||||
- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
|
||||
hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
@@ -336,14 +334,10 @@ static void update_cq_db(struct hns_roce_context *ctx,
|
||||
{
|
||||
struct hns_roce_db cq_db = {};
|
||||
|
||||
- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
|
||||
- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
- HNS_ROCE_V2_CQ_DB_PTR);
|
||||
-
|
||||
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M,
|
||||
- DB_PARAM_CQ_CONSUMER_IDX_S, cq->cons_index);
|
||||
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
|
||||
- DB_PARAM_CQ_CMD_SN_S, 1);
|
||||
+ hr_reg_write(&cq_db, DB_TAG, cq->cqn);
|
||||
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_PTR);
|
||||
+ hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
|
||||
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
|
||||
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
}
|
||||
@@ -663,7 +657,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
|
||||
if (npolled || err == V2_CQ_POLL_ERR) {
|
||||
if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
|
||||
- *cq->db = cq->cons_index & DB_PARAM_CQ_CONSUMER_IDX_M;
|
||||
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
|
||||
else
|
||||
update_cq_db(ctx, cq);
|
||||
}
|
||||
@@ -679,24 +673,17 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
struct hns_roce_db cq_db = {};
|
||||
uint32_t solicited_flag;
|
||||
- uint32_t cmd_sn;
|
||||
uint32_t ci;
|
||||
|
||||
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
|
||||
- cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK;
|
||||
solicited_flag = solicited ? HNS_ROCE_V2_CQ_DB_REQ_SOL :
|
||||
HNS_ROCE_V2_CQ_DB_REQ_NEXT;
|
||||
|
||||
- roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
|
||||
- roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
- HNS_ROCE_V2_CQ_DB_NTR);
|
||||
-
|
||||
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CONSUMER_IDX_M,
|
||||
- DB_PARAM_CQ_CONSUMER_IDX_S, ci);
|
||||
-
|
||||
- roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
|
||||
- DB_PARAM_CQ_CMD_SN_S, cmd_sn);
|
||||
- roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag);
|
||||
+ hr_reg_write(&cq_db, DB_TAG, cq->cqn);
|
||||
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NTR);
|
||||
+ hr_reg_write(&cq_db, DB_CQ_CI, ci);
|
||||
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
|
||||
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
|
||||
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
|
||||
@@ -1626,6 +1613,13 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
||||
idx_que->head++;
|
||||
}
|
||||
|
||||
+static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ hr_reg_write(db, DB_TAG, srq->srqn);
|
||||
+ hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
|
||||
+ hr_reg_write(db, DB_PI, srq->idx_que.head);
|
||||
+}
|
||||
+
|
||||
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
struct ibv_recv_wr *wr,
|
||||
struct ibv_recv_wr **bad_wr)
|
||||
@@ -1665,10 +1659,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
*/
|
||||
udma_to_device_barrier();
|
||||
|
||||
- srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S |
|
||||
- srq->srqn);
|
||||
- srq_db.parameter = htole32(srq->idx_que.head &
|
||||
- DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
+ update_srq_db(&srq_db, srq);
|
||||
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
||||
(__le32 *)&srq_db);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index 4330b7d..e91b1f7 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -41,8 +41,6 @@ enum {
|
||||
#define HNS_ROCE_V2_CQ_DB_REQ_SOL 1
|
||||
#define HNS_ROCE_V2_CQ_DB_REQ_NEXT 0
|
||||
|
||||
-#define HNS_ROCE_CMDSN_MASK 0x3
|
||||
-
|
||||
#define HNS_ROCE_SL_SHIFT 2
|
||||
|
||||
/* V2 REG DEFINITION */
|
||||
@@ -127,27 +125,19 @@ struct hns_roce_db {
|
||||
__le32 byte_4;
|
||||
__le32 parameter;
|
||||
};
|
||||
-#define DB_BYTE_4_TAG_S 0
|
||||
-#define DB_BYTE_4_TAG_M GENMASK(23, 0)
|
||||
-
|
||||
-#define DB_BYTE_4_CMD_S 24
|
||||
-#define DB_BYTE_4_CMD_M GENMASK(27, 24)
|
||||
-
|
||||
-#define DB_BYTE_4_FLAG_S 31
|
||||
-
|
||||
-#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0
|
||||
-#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0)
|
||||
-
|
||||
-#define DB_PARAM_SL_S 16
|
||||
-#define DB_PARAM_SL_M GENMASK(18, 16)
|
||||
|
||||
-#define DB_PARAM_CQ_CONSUMER_IDX_S 0
|
||||
-#define DB_PARAM_CQ_CONSUMER_IDX_M GENMASK(23, 0)
|
||||
+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_db, h, l)
|
||||
|
||||
-#define DB_PARAM_CQ_NOTIFY_S 24
|
||||
+#define DB_TAG DB_FIELD_LOC(23, 0)
|
||||
+#define DB_CMD DB_FIELD_LOC(27, 24)
|
||||
+#define DB_FLAG DB_FIELD_LOC(31, 31)
|
||||
+#define DB_PI DB_FIELD_LOC(47, 32)
|
||||
+#define DB_SL DB_FIELD_LOC(50, 48)
|
||||
+#define DB_CQ_CI DB_FIELD_LOC(55, 32)
|
||||
+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56)
|
||||
+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57)
|
||||
|
||||
-#define DB_PARAM_CQ_CMD_SN_S 25
|
||||
-#define DB_PARAM_CQ_CMD_SN_M GENMASK(26, 25)
|
||||
+#define RECORD_DB_CI_MASK GENMASK(23, 0)
|
||||
|
||||
struct hns_roce_v2_cqe {
|
||||
__le32 byte_4;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,306 +0,0 @@
|
||||
From 2da2a94f0ef5b6cf7fb8eacee1814a418d9bde74 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Sat, 25 Dec 2021 17:42:53 +0800
|
||||
Subject: libhns: Add new interfaces hr reg ***() to operate the CQE field
|
||||
|
||||
Implement hr_reg_xxx() to simplify the code for filling or extracting
|
||||
fields.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 53 +++++++++++++++++++++++++
|
||||
providers/hns/hns_roce_u_hw_v2.c | 58 ++++++++++------------------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 66 ++++++++++++--------------------
|
||||
3 files changed, 98 insertions(+), 79 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index c1ae1c9..df7f485 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -101,6 +101,59 @@
|
||||
#define roce_set_bit(origin, shift, val) \
|
||||
roce_set_field((origin), (1ul << (shift)), (shift), (val))
|
||||
|
||||
+#define FIELD_LOC(field_type, field_h, field_l) \
|
||||
+ field_type, field_h, \
|
||||
+ field_l + BUILD_ASSERT_OR_ZERO(((field_h) / 32) == \
|
||||
+ ((field_l) / 32))
|
||||
+
|
||||
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
|
||||
+ ({ \
|
||||
+ const field_type *_ptr = ptr; \
|
||||
+ BUILD_ASSERT((field_h) == (field_l)); \
|
||||
+ *((__le32 *)_ptr + (field_h) / 32) |= \
|
||||
+ htole32(BIT((field_l) % 32)); \
|
||||
+ })
|
||||
+
|
||||
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
|
||||
+
|
||||
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
|
||||
+ ({ \
|
||||
+ const field_type *_ptr = ptr; \
|
||||
+ BUILD_ASSERT((field_h) >= (field_l)); \
|
||||
+ *((__le32 *)_ptr + (field_h) / 32) &= \
|
||||
+ ~htole32(GENMASK((field_h) % 32, (field_l) % 32)); \
|
||||
+ })
|
||||
+
|
||||
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
|
||||
+
|
||||
+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \
|
||||
+ ({ \
|
||||
+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \
|
||||
+ _hr_reg_clear(ptr, field_type, field_h, field_l);\
|
||||
+ })
|
||||
+
|
||||
+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val)
|
||||
+
|
||||
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
|
||||
+ ({ \
|
||||
+ const uint32_t _val = val; \
|
||||
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
|
||||
+ *((__le32 *)ptr + (field_h) / 32) |= htole32(FIELD_PREP( \
|
||||
+ GENMASK((field_h) % 32, (field_l) % 32), _val)); \
|
||||
+ })
|
||||
+
|
||||
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
|
||||
+
|
||||
+#define _hr_reg_read(ptr, field_type, field_h, field_l) \
|
||||
+ ({ \
|
||||
+ const field_type *_ptr = ptr; \
|
||||
+ BUILD_ASSERT((field_h) >= (field_l)); \
|
||||
+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \
|
||||
+ le32toh(*((__le32 *)_ptr + (field_h) / 32))); \
|
||||
+ })
|
||||
+
|
||||
+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
|
||||
+
|
||||
enum {
|
||||
HNS_ROCE_QP_TABLE_BITS = 8,
|
||||
HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index e7dec0b..558457a 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -187,8 +187,7 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
}
|
||||
}
|
||||
|
||||
- wc->vendor_err = roce_get_field(cqe->byte_16, CQE_BYTE_16_SUB_STATUS_M,
|
||||
- CQE_BYTE_16_SUB_STATUS_S);
|
||||
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
|
||||
}
|
||||
|
||||
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
|
||||
@@ -200,8 +199,8 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n)
|
||||
{
|
||||
struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe);
|
||||
|
||||
- return (!!(roce_get_bit(cqe->byte_4, CQE_BYTE_4_OWNER_S)) ^
|
||||
- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL;
|
||||
+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ?
|
||||
+ cqe : NULL;
|
||||
}
|
||||
|
||||
static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
|
||||
@@ -257,8 +256,7 @@ static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
|
||||
uint32_t srqn;
|
||||
|
||||
if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
|
||||
- srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
|
||||
- CQE_BYTE_12_XRC_SRQN_S);
|
||||
+ srqn = hr_reg_read(cqe, CQE_XRC_SRQN);
|
||||
|
||||
*srq = hns_roce_find_srq(ctx, srqn);
|
||||
if (!*srq)
|
||||
@@ -438,15 +436,13 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
(opcode == HNS_ROCE_RECV_OP_SEND ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
|
||||
- (roce_get_bit(cqe->byte_4, CQE_BYTE_4_RQ_INLINE_S))) {
|
||||
+ hr_reg_read(cqe, CQE_RQ_INLINE)) {
|
||||
struct hns_roce_rinl_sge *sge_list;
|
||||
uint32_t wr_num, wr_cnt, sge_num, data_len;
|
||||
uint8_t *wqe_buf;
|
||||
uint32_t sge_cnt, size;
|
||||
|
||||
- wr_num = (uint16_t)roce_get_field(cqe->byte_4,
|
||||
- CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S) & 0xffff;
|
||||
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
|
||||
|
||||
sge_list = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sg_list;
|
||||
@@ -477,13 +473,10 @@ static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
|
||||
static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
|
||||
{
|
||||
- wc->sl = roce_get_field(cqe->byte_32, CQE_BYTE_32_SL_M,
|
||||
- CQE_BYTE_32_SL_S);
|
||||
- wc->src_qp = roce_get_field(cqe->byte_32, CQE_BYTE_32_RMT_QPN_M,
|
||||
- CQE_BYTE_32_RMT_QPN_S);
|
||||
+ wc->sl = hr_reg_read(cqe, CQE_SL);
|
||||
+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
|
||||
wc->slid = 0;
|
||||
- wc->wc_flags |= roce_get_bit(cqe->byte_32, CQE_BYTE_32_GRH_S) ?
|
||||
- IBV_WC_GRH : 0;
|
||||
+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0;
|
||||
wc->pkey_index = 0;
|
||||
}
|
||||
|
||||
@@ -492,8 +485,7 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
{
|
||||
uint32_t wqe_idx;
|
||||
|
||||
- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S);
|
||||
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
|
||||
hns_roce_free_srq_wqe(srq, wqe_idx);
|
||||
}
|
||||
@@ -533,8 +525,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
* according to the wqe idx in the current cqe first
|
||||
*/
|
||||
if (hr_qp->sq_signal_bits) {
|
||||
- wqe_idx = roce_get_field(cqe->byte_4, CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S);
|
||||
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
/* get the processed wqes num since last signalling */
|
||||
wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
|
||||
}
|
||||
@@ -590,8 +581,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
|
||||
udma_from_device_barrier();
|
||||
|
||||
- qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
|
||||
- CQE_BYTE_16_LCL_QPN_S);
|
||||
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
|
||||
|
||||
/* if cur qp is null, then could not get the correct qpn */
|
||||
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
|
||||
@@ -600,11 +590,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
return V2_CQ_POLL_ERR;
|
||||
}
|
||||
|
||||
- status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
|
||||
- CQE_BYTE_4_STATUS_S);
|
||||
- opcode = roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
|
||||
- CQE_BYTE_4_OPCODE_S);
|
||||
- is_send = roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) == CQE_FOR_SQ;
|
||||
+ status = hr_reg_read(cqe, CQE_STATUS);
|
||||
+ opcode = hr_reg_read(cqe, CQE_OPCODE);
|
||||
+ is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
|
||||
if (is_send) {
|
||||
parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
|
||||
} else {
|
||||
@@ -1350,26 +1338,20 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
|
||||
while ((int) --prod_index - (int) cq->cons_index >= 0) {
|
||||
cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe);
|
||||
- if (roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
|
||||
- CQE_BYTE_16_LCL_QPN_S) == qpn) {
|
||||
- is_recv_cqe = roce_get_bit(cqe->byte_4,
|
||||
- CQE_BYTE_4_S_R_S);
|
||||
+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
|
||||
+ is_recv_cqe = hr_reg_read(cqe, CQE_S_R);
|
||||
|
||||
if (srq && is_recv_cqe) {
|
||||
- wqe_index = roce_get_field(cqe->byte_4,
|
||||
- CQE_BYTE_4_WQE_IDX_M,
|
||||
- CQE_BYTE_4_WQE_IDX_S);
|
||||
+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
hns_roce_free_srq_wqe(srq, wqe_index);
|
||||
}
|
||||
++nfreed;
|
||||
} else if (nfreed) {
|
||||
dest = get_cqe_v2(cq,
|
||||
(prod_index + nfreed) & cq->ibv_cq.cqe);
|
||||
- owner_bit = roce_get_bit(dest->byte_4,
|
||||
- CQE_BYTE_4_OWNER_S);
|
||||
+ owner_bit = hr_reg_read(dest, CQE_OWNER);
|
||||
memcpy(dest, cqe, cq->cqe_size);
|
||||
- roce_set_bit(dest->byte_4, CQE_BYTE_4_OWNER_S,
|
||||
- owner_bit);
|
||||
+ hr_reg_write_bool(dest, CQE_OWNER, owner_bit);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index e91b1f7..92e5f1a 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -154,47 +154,31 @@ struct hns_roce_v2_cqe {
|
||||
__le32 rsv[8];
|
||||
};
|
||||
|
||||
-#define CQE_BYTE_4_OPCODE_S 0
|
||||
-#define CQE_BYTE_4_OPCODE_M (((1UL << 5) - 1) << CQE_BYTE_4_OPCODE_S)
|
||||
-
|
||||
-#define CQE_BYTE_4_RQ_INLINE_S 5
|
||||
-
|
||||
-#define CQE_BYTE_4_S_R_S 6
|
||||
-#define CQE_BYTE_4_OWNER_S 7
|
||||
-
|
||||
-#define CQE_BYTE_4_STATUS_S 8
|
||||
-#define CQE_BYTE_4_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_4_STATUS_S)
|
||||
-
|
||||
-#define CQE_BYTE_4_WQE_IDX_S 16
|
||||
-#define CQE_BYTE_4_WQE_IDX_M (((1UL << 16) - 1) << CQE_BYTE_4_WQE_IDX_S)
|
||||
-
|
||||
-#define CQE_BYTE_12_XRC_SRQN_S 0
|
||||
-#define CQE_BYTE_12_XRC_SRQN_M (((1UL << 24) - 1) << CQE_BYTE_12_XRC_SRQN_S)
|
||||
-
|
||||
-#define CQE_BYTE_16_LCL_QPN_S 0
|
||||
-#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S)
|
||||
-
|
||||
-#define CQE_BYTE_16_SUB_STATUS_S 24
|
||||
-#define CQE_BYTE_16_SUB_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_16_SUB_STATUS_S)
|
||||
-
|
||||
-#define CQE_BYTE_28_SMAC_S 0
|
||||
-#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S)
|
||||
-
|
||||
-#define CQE_BYTE_28_PORT_TYPE_S 16
|
||||
-#define CQE_BYTE_28_PORT_TYPE_M (((1UL << 2) - 1) << CQE_BYTE_28_PORT_TYPE_S)
|
||||
-
|
||||
-#define CQE_BYTE_32_RMT_QPN_S 0
|
||||
-#define CQE_BYTE_32_RMT_QPN_M (((1UL << 24) - 1) << CQE_BYTE_32_RMT_QPN_S)
|
||||
-
|
||||
-#define CQE_BYTE_32_SL_S 24
|
||||
-#define CQE_BYTE_32_SL_M (((1UL << 3) - 1) << CQE_BYTE_32_SL_S)
|
||||
-
|
||||
-#define CQE_BYTE_32_PORTN_S 27
|
||||
-#define CQE_BYTE_32_PORTN_M (((1UL << 3) - 1) << CQE_BYTE_32_PORTN_S)
|
||||
-
|
||||
-#define CQE_BYTE_32_GRH_S 30
|
||||
-
|
||||
-#define CQE_BYTE_32_LPK_S 31
|
||||
+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
|
||||
+
|
||||
+#define CQE_OPCODE CQE_FIELD_LOC(4, 0)
|
||||
+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5)
|
||||
+#define CQE_S_R CQE_FIELD_LOC(6, 6)
|
||||
+#define CQE_OWNER CQE_FIELD_LOC(7, 7)
|
||||
+#define CQE_STATUS CQE_FIELD_LOC(15, 8)
|
||||
+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
|
||||
+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
|
||||
+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
|
||||
+#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
|
||||
+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
|
||||
+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
|
||||
+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
|
||||
+#define CQE_SMAC CQE_FIELD_LOC(207, 160)
|
||||
+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208)
|
||||
+#define CQE_VID CQE_FIELD_LOC(221, 210)
|
||||
+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222)
|
||||
+#define CQE_RSV2 CQE_FIELD_LOC(223, 223)
|
||||
+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224)
|
||||
+#define CQE_SL CQE_FIELD_LOC(250, 248)
|
||||
+#define CQE_PORTN CQE_FIELD_LOC(253, 251)
|
||||
+#define CQE_GRH CQE_FIELD_LOC(254, 254)
|
||||
+#define CQE_LPK CQE_FIELD_LOC(255, 255)
|
||||
+#define CQE_RSV3 CQE_FIELD_LOC(511, 256)
|
||||
|
||||
struct hns_roce_rc_sq_wqe {
|
||||
__le32 byte_4;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,202 +0,0 @@
|
||||
From 48e8ca01b1e5d033fca6e988d2d280846c95d7e1 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Fri, 31 Dec 2021 18:01:06 +0800
|
||||
Subject: libhns: Fix the calculation of QP/SRQ table size
|
||||
|
||||
The table_size means the maximum number of QP/SRQ. This value may not be
|
||||
a power of two. The old algorithm will lead to a result that allocates a
|
||||
mismatched table.
|
||||
|
||||
Fixes: 887b78c80224 ("libhns: Add initial main frame")
|
||||
Fixes: 9e3df7578153 ("libhns: Support ibv_create_srq_ex")
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.c | 18 +++++++++++-------
|
||||
providers/hns/hns_roce_u.h | 20 ++++++++++++++------
|
||||
providers/hns/hns_roce_u_hw_v1.c | 4 ++--
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
|
||||
providers/hns/hns_roce_u_verbs.c | 9 ++++-----
|
||||
5 files changed, 33 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||||
index 9dc4905..6eac4ff 100644
|
||||
--- a/providers/hns/hns_roce_u.c
|
||||
+++ b/providers/hns/hns_roce_u.c
|
||||
@@ -92,6 +92,13 @@ static const struct verbs_context_ops hns_common_ops = {
|
||||
.get_srq_num = hns_roce_u_get_srq_num,
|
||||
};
|
||||
|
||||
+static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
|
||||
+{
|
||||
+ uint32_t count_shift = hr_ilog32(entry_count);
|
||||
+
|
||||
+ return count_shift > size_shift ? count_shift - size_shift : 0;
|
||||
+}
|
||||
+
|
||||
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||||
int cmd_fd,
|
||||
void *private_data)
|
||||
@@ -120,18 +127,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||||
else
|
||||
context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
|
||||
|
||||
- context->num_qps = resp.qp_tab_size;
|
||||
- context->num_srqs = resp.srq_tab_size;
|
||||
-
|
||||
- context->qp_table_shift = ffs(context->num_qps) - 1 -
|
||||
- HNS_ROCE_QP_TABLE_BITS;
|
||||
+ context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
|
||||
+ HNS_ROCE_QP_TABLE_BITS);
|
||||
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
|
||||
pthread_mutex_init(&context->qp_table_mutex, NULL);
|
||||
for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
|
||||
context->qp_table[i].refcnt = 0;
|
||||
|
||||
- context->srq_table_shift = ffs(context->num_srqs) - 1 -
|
||||
- HNS_ROCE_SRQ_TABLE_BITS;
|
||||
+ context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
|
||||
+ HNS_ROCE_SRQ_TABLE_BITS);
|
||||
context->srq_table_mask = (1 << context->srq_table_shift) - 1;
|
||||
pthread_mutex_init(&context->srq_table_mutex, NULL);
|
||||
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index df7f485..9366923 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -154,10 +154,8 @@
|
||||
|
||||
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
|
||||
|
||||
-enum {
|
||||
- HNS_ROCE_QP_TABLE_BITS = 8,
|
||||
- HNS_ROCE_QP_TABLE_SIZE = 1 << HNS_ROCE_QP_TABLE_BITS,
|
||||
-};
|
||||
+#define HNS_ROCE_QP_TABLE_BITS 8
|
||||
+#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
|
||||
|
||||
#define HNS_ROCE_SRQ_TABLE_BITS 8
|
||||
#define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
|
||||
@@ -211,7 +209,6 @@ struct hns_roce_context {
|
||||
int refcnt;
|
||||
} qp_table[HNS_ROCE_QP_TABLE_SIZE];
|
||||
pthread_mutex_t qp_table_mutex;
|
||||
- uint32_t num_qps;
|
||||
uint32_t qp_table_shift;
|
||||
uint32_t qp_table_mask;
|
||||
|
||||
@@ -220,7 +217,6 @@ struct hns_roce_context {
|
||||
int refcnt;
|
||||
} srq_table[HNS_ROCE_SRQ_TABLE_SIZE];
|
||||
pthread_mutex_t srq_table_mutex;
|
||||
- uint32_t num_srqs;
|
||||
uint32_t srq_table_shift;
|
||||
uint32_t srq_table_mask;
|
||||
|
||||
@@ -382,6 +378,18 @@ static inline unsigned int hr_ilog32(unsigned int count)
|
||||
return ilog32(count - 1);
|
||||
}
|
||||
|
||||
+static inline uint32_t to_hr_qp_table_index(uint32_t qpn,
|
||||
+ struct hns_roce_context *ctx)
|
||||
+{
|
||||
+ return (qpn >> ctx->qp_table_shift) & (HNS_ROCE_QP_TABLE_SIZE - 1);
|
||||
+}
|
||||
+
|
||||
+static inline uint32_t to_hr_srq_table_index(uint32_t srqn,
|
||||
+ struct hns_roce_context *ctx)
|
||||
+{
|
||||
+ return (srqn >> ctx->srq_table_shift) & (HNS_ROCE_SRQ_TABLE_SIZE - 1);
|
||||
+}
|
||||
+
|
||||
static inline struct hns_roce_device *to_hr_dev(struct ibv_device *ibv_dev)
|
||||
{
|
||||
return container_of(ibv_dev, struct hns_roce_device, ibv_dev.device);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 838e004..28ad482 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -220,7 +220,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
|
||||
static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
|
||||
uint32_t qpn)
|
||||
{
|
||||
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
|
||||
|
||||
if (ctx->qp_table[tind].refcnt) {
|
||||
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
|
||||
@@ -232,7 +232,7 @@ static struct hns_roce_qp *hns_roce_find_qp(struct hns_roce_context *ctx,
|
||||
|
||||
static void hns_roce_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
|
||||
{
|
||||
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
|
||||
|
||||
if (!--ctx->qp_table[tind].refcnt)
|
||||
free(ctx->qp_table[tind].table);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 558457a..e39ee7f 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -343,7 +343,7 @@ static void update_cq_db(struct hns_roce_context *ctx,
|
||||
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
uint32_t qpn)
|
||||
{
|
||||
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
|
||||
|
||||
if (ctx->qp_table[tind].refcnt)
|
||||
return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
|
||||
@@ -354,7 +354,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
|
||||
{
|
||||
uint32_t qpn = qp->verbs_qp.qp.qp_num;
|
||||
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
|
||||
|
||||
pthread_mutex_lock(&ctx->qp_table_mutex);
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 557d075..5ccb701 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -431,8 +431,7 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
|
||||
static int hns_roce_store_srq(struct hns_roce_context *ctx,
|
||||
struct hns_roce_srq *srq)
|
||||
{
|
||||
- uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
|
||||
- ctx->srq_table_shift;
|
||||
+ uint32_t tind = to_hr_srq_table_index(srq->srqn, ctx);
|
||||
|
||||
pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
|
||||
@@ -457,7 +456,7 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
|
||||
struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx,
|
||||
uint32_t srqn)
|
||||
{
|
||||
- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
|
||||
+ uint32_t tind = to_hr_srq_table_index(srqn, ctx);
|
||||
|
||||
if (ctx->srq_table[tind].refcnt)
|
||||
return ctx->srq_table[tind].table[srqn & ctx->srq_table_mask];
|
||||
@@ -467,7 +466,7 @@ struct hns_roce_srq *hns_roce_find_srq(struct hns_roce_context *ctx,
|
||||
|
||||
static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
|
||||
{
|
||||
- uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
|
||||
+ uint32_t tind = to_hr_srq_table_index(srqn, ctx);
|
||||
|
||||
pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
|
||||
@@ -1108,7 +1107,7 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
|
||||
struct hns_roce_qp *qp)
|
||||
{
|
||||
uint32_t qpn = qp->verbs_qp.qp.qp_num;
|
||||
- uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
+ uint32_t tind = to_hr_qp_table_index(qpn, ctx);
|
||||
|
||||
pthread_mutex_lock(&ctx->qp_table_mutex);
|
||||
if (!ctx->qp_table[tind].refcnt) {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,31 +0,0 @@
|
||||
From d4766cd11b985f7f798410129a0b204beb13ecef Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Mon, 17 Jan 2022 20:43:39 +0800
|
||||
Subject: libhns: Fix wrong HIP08 version macro
|
||||
|
||||
The version macro of HIP08 should be consistent with the version number
|
||||
queried from the hardware.
|
||||
|
||||
Fixes: b8cb140e9cd6 ("libhns: Refresh version info before using it")
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 9366923..2b4ba18 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -48,8 +48,7 @@
|
||||
#include "hns_roce_u_abi.h"
|
||||
|
||||
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
|
||||
-
|
||||
-#define HNS_ROCE_HW_VER2 ('h' << 24 | 'i' << 16 | '0' << 8 | '8')
|
||||
+#define HNS_ROCE_HW_VER2 0x100
|
||||
#define HNS_ROCE_HW_VER3 0x130
|
||||
|
||||
#define PFX "hns: "
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,115 +0,0 @@
|
||||
From 203675526b14d9556eeb4212536ebcfc81691c1b Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Mon, 17 Jan 2022 20:43:38 +0800
|
||||
Subject: libhns: Fix out-of-bounds write when filling inline data into
|
||||
extended sge space
|
||||
|
||||
If the buf to store inline data is in the last page of the extended sge
|
||||
space, filling the entire inline data into the extended sge space at one
|
||||
time may result in out-of-bounds writing.
|
||||
|
||||
When the remaining space at the end of the extended sge is not enough to
|
||||
accommodate the entire inline data, the inline data needs to be filled
|
||||
into the extended sge space in two steps:
|
||||
(1) The front part of the inline data is filled into the remaining space
|
||||
at the end of the extended sge.
|
||||
(2) The remaining inline data is filled into the header space of the
|
||||
extended sge.
|
||||
|
||||
Fixes: b7814b7b9715("libhns: Support inline data in extented sge space for RC")
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 40 ++++++++++++++++++++++----------
|
||||
1 file changed, 28 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index e39ee7f..20745dc 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -772,21 +772,43 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
||||
struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg);
|
||||
- void *dseg;
|
||||
+ unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
|
||||
+ void *dst_addr, *src_addr, *tail_bound_addr;
|
||||
+ uint32_t src_len, tail_len;
|
||||
int i;
|
||||
|
||||
+
|
||||
if (sge_info->total_len > qp->sq.max_gs * sge_sz)
|
||||
return EINVAL;
|
||||
|
||||
- dseg = get_send_sge_ex(qp, sge_info->start_idx);
|
||||
+ dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
|
||||
+ tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask);
|
||||
|
||||
for (i = 0; i < wr->num_sge; i++) {
|
||||
- memcpy(dseg, (void *)(uintptr_t)wr->sg_list[i].addr,
|
||||
- wr->sg_list[i].length);
|
||||
- dseg += wr->sg_list[i].length;
|
||||
+ tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr;
|
||||
+
|
||||
+ src_addr = (void *)(uintptr_t)wr->sg_list[i].addr;
|
||||
+ src_len = wr->sg_list[i].length;
|
||||
+
|
||||
+ if (src_len < tail_len) {
|
||||
+ memcpy(dst_addr, src_addr, src_len);
|
||||
+ dst_addr += src_len;
|
||||
+ } else if (src_len == tail_len) {
|
||||
+ memcpy(dst_addr, src_addr, src_len);
|
||||
+ dst_addr = get_send_sge_ex(qp, 0);
|
||||
+ } else {
|
||||
+ memcpy(dst_addr, src_addr, tail_len);
|
||||
+ dst_addr = get_send_sge_ex(qp, 0);
|
||||
+ src_addr += tail_len;
|
||||
+ src_len -= tail_len;
|
||||
+
|
||||
+ memcpy(dst_addr, src_addr, src_len);
|
||||
+ dst_addr += src_len;
|
||||
+ }
|
||||
}
|
||||
|
||||
- sge_info->start_idx += DIV_ROUND_UP(sge_info->total_len, sge_sz);
|
||||
+ sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, sge_sz);
|
||||
+ sge_info->start_idx += sge_info->valid_num;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -828,7 +850,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
- unsigned int sge_idx = sge_info->start_idx;
|
||||
int ret;
|
||||
|
||||
if (!check_inl_data_len(qp, sge_info->total_len))
|
||||
@@ -845,8 +866,6 @@ static int set_ud_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- sge_info->valid_num = sge_info->start_idx - sge_idx;
|
||||
-
|
||||
hr_reg_write(ud_sq_wqe, UDWQE_SGE_NUM, sge_info->valid_num);
|
||||
}
|
||||
|
||||
@@ -969,7 +988,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
struct hns_roce_rc_sq_wqe *rc_sq_wqe,
|
||||
struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
- unsigned int sge_idx = sge_info->start_idx;
|
||||
void *dseg = rc_sq_wqe;
|
||||
int ret;
|
||||
int i;
|
||||
@@ -997,8 +1015,6 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- sge_info->valid_num = sge_info->start_idx - sge_idx;
|
||||
-
|
||||
hr_reg_write(rc_sq_wqe, RCWQE_SGE_NUM, sge_info->valid_num);
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,171 +0,0 @@
|
||||
From 85a5aa79327f45e4bea8d7ad0e55842225ca676a Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 18 Jan 2022 19:58:51 +0800
|
||||
Subject: libhns: Clear remaining unused sges when post recv
|
||||
|
||||
The HIP09 requires the driver to clear the unused data segments in wqe
|
||||
buffer to make the hns ROCEE stop reading the remaining invalid sges for
|
||||
RQ.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 88 ++++++++++++++------------------
|
||||
1 file changed, 39 insertions(+), 49 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 20745dc..6b0d7f1 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -85,14 +85,6 @@ static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
dseg->len = htole32(sg->length);
|
||||
}
|
||||
|
||||
-/* Fill an ending sge to make hw stop reading the remaining sges in wqe */
|
||||
-static inline void set_ending_data_seg(struct hns_roce_v2_wqe_data_seg *dseg)
|
||||
-{
|
||||
- dseg->lkey = htole32(0x0);
|
||||
- dseg->addr = 0;
|
||||
- dseg->len = htole32(INVALID_SGE_LENGTH);
|
||||
-}
|
||||
-
|
||||
static void set_extend_atomic_seg(struct hns_roce_qp *qp, unsigned int sge_cnt,
|
||||
struct hns_roce_sge_info *sge_info, void *buf)
|
||||
{
|
||||
@@ -1247,23 +1239,43 @@ static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
|
||||
- unsigned int wqe_idx)
|
||||
+static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
|
||||
+ unsigned int max_sge, bool rsv)
|
||||
{
|
||||
- struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
- struct hns_roce_rinl_sge *sge_list;
|
||||
- int i;
|
||||
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
|
||||
+ unsigned int i, cnt;
|
||||
|
||||
- dseg = get_recv_wqe_v2(qp, wqe_idx);
|
||||
- for (i = 0; i < wr->num_sge; i++) {
|
||||
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
|
||||
+ /* Skip zero-length sge */
|
||||
if (!wr->sg_list[i].length)
|
||||
continue;
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
- dseg++;
|
||||
+
|
||||
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
|
||||
+ cnt++;
|
||||
}
|
||||
|
||||
- if (qp->rq.rsv_sge)
|
||||
- set_ending_data_seg(dseg);
|
||||
+ /* Fill a reserved sge to make ROCEE stop reading remaining segments */
|
||||
+ if (rsv) {
|
||||
+ dseg[cnt].lkey = 0;
|
||||
+ dseg[cnt].addr = 0;
|
||||
+ dseg[cnt].len = htole32(INVALID_SGE_LENGTH);
|
||||
+ } else {
|
||||
+ /* Clear remaining segments to make ROCEE ignore sges */
|
||||
+ if (cnt < max_sge)
|
||||
+ memset(dseg + cnt, 0,
|
||||
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
|
||||
+ unsigned int wqe_idx, unsigned int max_sge)
|
||||
+{
|
||||
+ struct hns_roce_rinl_sge *sge_list;
|
||||
+ unsigned int i;
|
||||
+ void *wqe;
|
||||
+
|
||||
+ wqe = get_recv_wqe_v2(qp, wqe_idx);
|
||||
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, qp->rq.rsv_sge);
|
||||
|
||||
if (!qp->rq_rinl_buf.wqe_cnt)
|
||||
return;
|
||||
@@ -1310,7 +1322,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
|
||||
}
|
||||
|
||||
wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1);
|
||||
- fill_rq_wqe(qp, wr, wqe_idx);
|
||||
+ fill_rq_wqe(qp, wr, wqe_idx, max_sge);
|
||||
qp->rq.wrid[wqe_idx] = wr->wr_id;
|
||||
}
|
||||
|
||||
@@ -1536,10 +1548,8 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
}
|
||||
|
||||
static int check_post_srq_valid(struct hns_roce_srq *srq,
|
||||
- struct ibv_recv_wr *wr)
|
||||
+ struct ibv_recv_wr *wr, unsigned int max_sge)
|
||||
{
|
||||
- unsigned int max_sge = srq->max_gs - srq->rsv_sge;
|
||||
-
|
||||
if (hns_roce_v2_srqwq_overflow(srq))
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -1575,28 +1585,6 @@ static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
|
||||
- struct ibv_recv_wr *wr)
|
||||
-{
|
||||
- struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
- int i;
|
||||
-
|
||||
- dseg = get_srq_wqe(srq, wqe_idx);
|
||||
-
|
||||
- for (i = 0; i < wr->num_sge; ++i) {
|
||||
- dseg[i].len = htole32(wr->sg_list[i].length);
|
||||
- dseg[i].lkey = htole32(wr->sg_list[i].lkey);
|
||||
- dseg[i].addr = htole64(wr->sg_list[i].addr);
|
||||
- }
|
||||
-
|
||||
- /* hw stop reading when identify the last one */
|
||||
- if (srq->rsv_sge) {
|
||||
- dseg[i].len = htole32(INVALID_SGE_LENGTH);
|
||||
- dseg[i].lkey = htole32(0x0);
|
||||
- dseg[i].addr = 0;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
@@ -1624,15 +1612,16 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
{
|
||||
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
|
||||
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
|
||||
+ unsigned int wqe_idx, max_sge, nreq;
|
||||
struct hns_roce_db srq_db;
|
||||
- unsigned int wqe_idx;
|
||||
int ret = 0;
|
||||
- int nreq;
|
||||
+ void *wqe;
|
||||
|
||||
pthread_spin_lock(&srq->lock);
|
||||
|
||||
+ max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
- ret = check_post_srq_valid(srq, wr);
|
||||
+ ret = check_post_srq_valid(srq, wr, max_sge);
|
||||
if (ret) {
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
@@ -1644,7 +1633,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- fill_srq_wqe(srq, wqe_idx, wr);
|
||||
+ wqe = get_srq_wqe(srq, wqe_idx);
|
||||
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
|
||||
fill_wqe_idx(srq, wqe_idx);
|
||||
|
||||
srq->wrid[wqe_idx] = wr->wr_id;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,346 +0,0 @@
|
||||
From d8596eff4eb46d1db1b6066e3bbbd03976f49e58 Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Mon, 7 Mar 2022 18:49:35 +0800
|
||||
Subject: libhns: Add support for creating extended CQ
|
||||
|
||||
The driver supports ibv_create_cq_ex() to create extended CQ. But the
|
||||
driver does not yet support the extended attributes specified by
|
||||
attr->com_mask and attr->wc_flas.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.c | 1 +
|
||||
providers/hns/hns_roce_u.h | 6 ++-
|
||||
providers/hns/hns_roce_u_abi.h | 5 +++
|
||||
providers/hns/hns_roce_u_hw_v1.c | 20 +++++-----
|
||||
providers/hns/hns_roce_u_hw_v2.c | 16 ++++----
|
||||
providers/hns/hns_roce_u_verbs.c | 63 ++++++++++++++++++++++----------
|
||||
6 files changed, 72 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||||
index 6eac4ff0..f8a647b8 100644
|
||||
--- a/providers/hns/hns_roce_u.c
|
||||
+++ b/providers/hns/hns_roce_u.c
|
||||
@@ -66,6 +66,7 @@ static const struct verbs_context_ops hns_common_ops = {
|
||||
.bind_mw = hns_roce_u_bind_mw,
|
||||
.cq_event = hns_roce_u_cq_event,
|
||||
.create_cq = hns_roce_u_create_cq,
|
||||
+ .create_cq_ex = hns_roce_u_create_cq_ex,
|
||||
.create_qp = hns_roce_u_create_qp,
|
||||
.create_qp_ex = hns_roce_u_create_qp_ex,
|
||||
.dealloc_mw = hns_roce_u_dealloc_mw,
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 2b4ba181..505e7498 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -236,7 +236,7 @@ struct hns_roce_pd {
|
||||
};
|
||||
|
||||
struct hns_roce_cq {
|
||||
- struct ibv_cq ibv_cq;
|
||||
+ struct verbs_cq verbs_cq;
|
||||
struct hns_roce_buf buf;
|
||||
pthread_spinlock_t lock;
|
||||
unsigned int cqn;
|
||||
@@ -406,7 +406,7 @@ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
|
||||
|
||||
static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
|
||||
{
|
||||
- return container_of(ibv_cq, struct hns_roce_cq, ibv_cq);
|
||||
+ return container_of(ibv_cq, struct hns_roce_cq, verbs_cq.cq);
|
||||
}
|
||||
|
||||
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
|
||||
@@ -447,6 +447,8 @@ int hns_roce_u_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
|
||||
struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
struct ibv_comp_channel *channel,
|
||||
int comp_vector);
|
||||
+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
|
||||
+ struct ibv_cq_init_attr_ex *cq_attr);
|
||||
|
||||
int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
|
||||
int hns_roce_u_destroy_cq(struct ibv_cq *cq);
|
||||
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
|
||||
index e56f9d35..333f977e 100644
|
||||
--- a/providers/hns/hns_roce_u_abi.h
|
||||
+++ b/providers/hns/hns_roce_u_abi.h
|
||||
@@ -39,8 +39,13 @@
|
||||
|
||||
DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
|
||||
empty, hns_roce_ib_alloc_pd_resp);
|
||||
+
|
||||
DECLARE_DRV_CMD(hns_roce_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
|
||||
hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
|
||||
+
|
||||
+DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
|
||||
+ hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
|
||||
+
|
||||
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
|
||||
empty, hns_roce_ib_alloc_ucontext_resp);
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 28ad482c..d47cba0c 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -161,10 +161,10 @@ static struct hns_roce_cqe *get_cqe(struct hns_roce_cq *cq, int entry)
|
||||
|
||||
static void *get_sw_cqe(struct hns_roce_cq *cq, int n)
|
||||
{
|
||||
- struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
|
||||
+ struct hns_roce_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe);
|
||||
|
||||
return (!!(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S)) ^
|
||||
- !!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL;
|
||||
+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL;
|
||||
}
|
||||
|
||||
static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *cq)
|
||||
@@ -210,7 +210,7 @@ static int hns_roce_wq_overflow(struct hns_roce_wq *wq, int nreq,
|
||||
cur = wq->head - wq->tail;
|
||||
pthread_spin_unlock(&cq->lock);
|
||||
|
||||
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
|
||||
"wq:(head = %d, tail = %d, max_post = %d), nreq = 0x%x\n",
|
||||
wq->head, wq->tail, wq->max_post, nreq);
|
||||
|
||||
@@ -274,10 +274,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
|
||||
if (!*cur_qp ||
|
||||
(local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->verbs_qp.qp.qp_num) {
|
||||
|
||||
- *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context),
|
||||
+ *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->verbs_cq.cq.context),
|
||||
qpn & 0xffffff);
|
||||
if (!*cur_qp) {
|
||||
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
|
||||
PFX "can't find qp!\n");
|
||||
return CQ_POLL_ERR;
|
||||
}
|
||||
@@ -317,7 +317,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *cq,
|
||||
if (roce_get_field(cqe->cqe_byte_4,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
|
||||
CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) {
|
||||
- verbs_err(verbs_get_ctx(cq->ibv_cq.context),
|
||||
+ verbs_err(verbs_get_ctx(cq->verbs_cq.cq.context),
|
||||
PFX "error cqe!\n");
|
||||
hns_roce_handle_error_cqe(cqe, wc);
|
||||
return CQ_OK;
|
||||
@@ -599,21 +599,21 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
uint32_t prod_index;
|
||||
uint8_t owner_bit = 0;
|
||||
struct hns_roce_cqe *cqe, *dest;
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
|
||||
|
||||
for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index);
|
||||
++prod_index)
|
||||
- if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
|
||||
+ if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe)
|
||||
break;
|
||||
|
||||
while ((int) --prod_index - (int) cq->cons_index >= 0) {
|
||||
- cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
|
||||
+ cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe);
|
||||
if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
|
||||
CQE_BYTE_16_LOCAL_QPN_S) & 0xffffff) == qpn) {
|
||||
++nfreed;
|
||||
} else if (nfreed) {
|
||||
dest = get_cqe(cq,
|
||||
- (prod_index + nfreed) & cq->ibv_cq.cqe);
|
||||
+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe);
|
||||
owner_bit = roce_get_bit(dest->cqe_byte_4,
|
||||
CQE_BYTE_4_OWNER_S);
|
||||
memcpy(dest, cqe, sizeof(*cqe));
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index bfd98760..07f3596d 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -189,10 +189,10 @@ static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
|
||||
|
||||
static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n)
|
||||
{
|
||||
- struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->ibv_cq.cqe);
|
||||
+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(cq, n & cq->verbs_cq.cq.cqe);
|
||||
|
||||
- return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & (cq->ibv_cq.cqe + 1))) ?
|
||||
- cqe : NULL;
|
||||
+ return (hr_reg_read(cqe, CQE_OWNER) ^
|
||||
+ !!(n & (cq->verbs_cq.cq.cqe + 1))) ? cqe : NULL;
|
||||
}
|
||||
|
||||
static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
|
||||
@@ -556,7 +556,7 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
|
||||
{
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
|
||||
struct hns_roce_srq *srq = NULL;
|
||||
struct hns_roce_v2_cqe *cqe;
|
||||
uint8_t opcode;
|
||||
@@ -1356,15 +1356,15 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
uint16_t wqe_index;
|
||||
uint32_t prod_index;
|
||||
struct hns_roce_v2_cqe *cqe, *dest;
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
|
||||
|
||||
for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index);
|
||||
++prod_index)
|
||||
- if (prod_index > cq->cons_index + cq->ibv_cq.cqe)
|
||||
+ if (prod_index > cq->cons_index + cq->verbs_cq.cq.cqe)
|
||||
break;
|
||||
|
||||
while ((int) --prod_index - (int) cq->cons_index >= 0) {
|
||||
- cqe = get_cqe_v2(cq, prod_index & cq->ibv_cq.cqe);
|
||||
+ cqe = get_cqe_v2(cq, prod_index & cq->verbs_cq.cq.cqe);
|
||||
if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
|
||||
is_recv_cqe = hr_reg_read(cqe, CQE_S_R);
|
||||
|
||||
@@ -1375,7 +1375,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||
++nfreed;
|
||||
} else if (nfreed) {
|
||||
dest = get_cqe_v2(cq,
|
||||
- (prod_index + nfreed) & cq->ibv_cq.cqe);
|
||||
+ (prod_index + nfreed) & cq->verbs_cq.cq.cqe);
|
||||
owner_bit = hr_reg_read(dest, CQE_OWNER);
|
||||
memcpy(dest, cqe, cq->cqe_size);
|
||||
hr_reg_write_bool(dest, CQE_OWNER, owner_bit);
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 3cc9e0c2..a993c39a 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -276,12 +276,17 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context)
|
||||
+static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
|
||||
+ struct hns_roce_context *context)
|
||||
{
|
||||
- if (*cqe < 1 || *cqe > context->max_cqe)
|
||||
+ if (!attr->cqe || attr->cqe > context->max_cqe)
|
||||
return -EINVAL;
|
||||
|
||||
- *cqe = max((uint64_t)HNS_ROCE_MIN_CQE_NUM, roundup_pow_of_two(*cqe));
|
||||
+ if (attr->comp_mask || attr->wc_flags)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
|
||||
+ roundup_pow_of_two(attr->cqe));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -297,25 +302,25 @@ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
|
||||
}
|
||||
|
||||
static int exec_cq_create_cmd(struct ibv_context *context,
|
||||
- struct hns_roce_cq *cq, int cqe,
|
||||
- struct ibv_comp_channel *channel, int comp_vector)
|
||||
+ struct hns_roce_cq *cq,
|
||||
+ struct ibv_cq_init_attr_ex *attr)
|
||||
{
|
||||
+ struct hns_roce_create_cq_ex_resp resp_ex = {};
|
||||
struct hns_roce_ib_create_cq_resp *resp_drv;
|
||||
- struct hns_roce_create_cq_resp resp = {};
|
||||
+ struct hns_roce_create_cq_ex cmd_ex = {};
|
||||
struct hns_roce_ib_create_cq *cmd_drv;
|
||||
- struct hns_roce_create_cq cmd = {};
|
||||
int ret;
|
||||
|
||||
- cmd_drv = &cmd.drv_payload;
|
||||
- resp_drv = &resp.drv_payload;
|
||||
+ cmd_drv = &cmd_ex.drv_payload;
|
||||
+ resp_drv = &resp_ex.drv_payload;
|
||||
|
||||
cmd_drv->buf_addr = (uintptr_t)cq->buf.buf;
|
||||
cmd_drv->db_addr = (uintptr_t)cq->db;
|
||||
cmd_drv->cqe_size = (uintptr_t)cq->cqe_size;
|
||||
|
||||
- ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
|
||||
- &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd),
|
||||
- &resp.ibv_resp, sizeof(resp));
|
||||
+ ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq,
|
||||
+ &cmd_ex.ibv_cmd, sizeof(cmd_ex),
|
||||
+ &resp_ex.ibv_resp, sizeof(resp_ex), 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -325,16 +330,15 @@ static int exec_cq_create_cmd(struct ibv_context *context,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
- struct ibv_comp_channel *channel,
|
||||
- int comp_vector)
|
||||
+static struct ibv_cq_ex *create_cq(struct ibv_context *context,
|
||||
+ struct ibv_cq_init_attr_ex *attr)
|
||||
{
|
||||
struct hns_roce_device *hr_dev = to_hr_dev(context->device);
|
||||
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
|
||||
struct hns_roce_cq *cq;
|
||||
int ret;
|
||||
|
||||
- ret = hns_roce_verify_cq(&cqe, hr_ctx);
|
||||
+ ret = verify_cq_create_attr(attr, hr_ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@@ -348,7 +352,7 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
if (ret)
|
||||
goto err_lock;
|
||||
|
||||
- cq->cq_depth = cqe;
|
||||
+ cq->cq_depth = attr->cqe;
|
||||
cq->cqe_size = hr_ctx->cqe_size;
|
||||
|
||||
ret = hns_roce_alloc_cq_buf(cq);
|
||||
@@ -363,13 +367,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
|
||||
*cq->db = 0;
|
||||
|
||||
- ret = exec_cq_create_cmd(context, cq, cqe, channel, comp_vector);
|
||||
+ ret = exec_cq_create_cmd(context, cq, attr);
|
||||
if (ret)
|
||||
goto err_cmd;
|
||||
|
||||
cq->arm_sn = 1;
|
||||
|
||||
- return &cq->ibv_cq;
|
||||
+ return &cq->verbs_cq.cq_ex;
|
||||
|
||||
err_cmd:
|
||||
if (hr_dev->hw_version != HNS_ROCE_HW_VER1)
|
||||
@@ -387,6 +391,27 @@ err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
+ struct ibv_comp_channel *channel,
|
||||
+ int comp_vector)
|
||||
+{
|
||||
+ struct ibv_cq_ex *cq;
|
||||
+ struct ibv_cq_init_attr_ex attr = {
|
||||
+ .cqe = cqe,
|
||||
+ .channel = channel,
|
||||
+ .comp_vector = comp_vector,
|
||||
+ };
|
||||
+
|
||||
+ cq = create_cq(context, &attr);
|
||||
+ return cq ? ibv_cq_ex_to_cq(cq) : NULL;
|
||||
+}
|
||||
+
|
||||
+struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
|
||||
+ struct ibv_cq_init_attr_ex *attr)
|
||||
+{
|
||||
+ return create_cq(context, attr);
|
||||
+}
|
||||
+
|
||||
void hns_roce_u_cq_event(struct ibv_cq *cq)
|
||||
{
|
||||
to_hr_cq(cq)->arm_sn++;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,415 +0,0 @@
|
||||
From 0464e0cb0416d679aba3b58261bbd2cadb74fd03 Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Mon, 7 Mar 2022 18:49:36 +0800
|
||||
Subject: libhns: Extended CQ supports the new polling mechanism
|
||||
|
||||
ofed provides new polling APIs for extended CQ. With the new APIs, users
|
||||
can poll the extended CQ more efficiently.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 1 +
|
||||
providers/hns/hns_roce_u_hw_v2.c | 319 +++++++++++++++++++++++++++++++
|
||||
providers/hns/hns_roce_u_hw_v2.h | 1 +
|
||||
providers/hns/hns_roce_u_verbs.c | 18 +-
|
||||
4 files changed, 337 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 505e7498..70ac6e5b 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -247,6 +247,7 @@ struct hns_roce_cq {
|
||||
int arm_sn;
|
||||
unsigned long flags;
|
||||
unsigned int cqe_size;
|
||||
+ struct hns_roce_v2_cqe *cqe;
|
||||
};
|
||||
|
||||
struct hns_roce_idx_que {
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 07f3596d..081ab1f3 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1657,6 +1657,325 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
|
||||
+ struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ struct hns_roce_wq *wq = &hr_qp->sq;
|
||||
+
|
||||
+ if (hr_qp->sq_signal_bits)
|
||||
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
|
||||
+
|
||||
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+}
|
||||
+
|
||||
+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
|
||||
+ struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
|
||||
+ hns_roce_free_srq_wqe(srq, wqe_idx);
|
||||
+}
|
||||
+
|
||||
+static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ struct hns_roce_wq *wq = &hr_qp->rq;
|
||||
+
|
||||
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+}
|
||||
+
|
||||
+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
|
||||
+ struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
|
||||
+ struct hns_roce_srq *srq = NULL;
|
||||
+ uint32_t wqe_idx;
|
||||
+
|
||||
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
|
||||
+ cqe_proc_sq(qp, wqe_idx, cq);
|
||||
+ } else {
|
||||
+ if (get_srq_from_cqe(cqe, ctx, qp, &srq))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ if (srq)
|
||||
+ cqe_proc_srq(srq, wqe_idx, cq);
|
||||
+ else
|
||||
+ cqe_proc_rq(qp, cq);
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ static const struct {
|
||||
+ unsigned int cqe_status;
|
||||
+ enum ibv_wc_status wc_status;
|
||||
+ } map[] = {
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR,
|
||||
+ IBV_WC_RETRY_EXC_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
|
||||
+ };
|
||||
+
|
||||
+ cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR;
|
||||
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
+ if (status == map[i].cqe_status) {
|
||||
+ cq->verbs_cq.cq_ex.status = map[i].wc_status;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ struct hns_roce_qp *qp = NULL;
|
||||
+ struct hns_roce_v2_cqe *cqe;
|
||||
+ uint8_t status;
|
||||
+ uint32_t qpn;
|
||||
+
|
||||
+ cqe = next_cqe_sw_v2(cq);
|
||||
+ if (!cqe)
|
||||
+ return ENOENT;
|
||||
+
|
||||
+ ++cq->cons_index;
|
||||
+ udma_from_device_barrier();
|
||||
+
|
||||
+ cq->cqe = cqe;
|
||||
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
|
||||
+
|
||||
+ qp = hns_roce_v2_find_qp(ctx, qpn);
|
||||
+ if (!qp)
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ if (cqe_proc_wq(ctx, qp, cq))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ status = hr_reg_read(cqe, CQE_STATUS);
|
||||
+
|
||||
+ /*
|
||||
+ * once a cqe in error status, the driver needs to help the HW to
|
||||
+ * generated flushed cqes for all subsequent wqes
|
||||
+ */
|
||||
+ if (status != HNS_ROCE_V2_CQE_SUCCESS) {
|
||||
+ handle_error_cqe_ex(cq, status);
|
||||
+ return hns_roce_flush_cqe(qp, status);
|
||||
+ }
|
||||
+
|
||||
+ cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS;
|
||||
+
|
||||
+ return V2_CQ_OK;
|
||||
+}
|
||||
+
|
||||
+static int wc_start_poll_cq(struct ibv_cq_ex *current,
|
||||
+ struct ibv_poll_cq_attr *attr)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
|
||||
+ int err;
|
||||
+
|
||||
+ if (attr->comp_mask)
|
||||
+ return EINVAL;
|
||||
+
|
||||
+ pthread_spin_lock(&cq->lock);
|
||||
+
|
||||
+ err = wc_poll_cqe(ctx, cq);
|
||||
+ if (err != V2_CQ_OK)
|
||||
+ pthread_spin_unlock(&cq->lock);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int wc_next_poll_cq(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
|
||||
+ int err;
|
||||
+
|
||||
+ err = wc_poll_cqe(ctx, cq);
|
||||
+ if (err != V2_CQ_OK)
|
||||
+ return err;
|
||||
+
|
||||
+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
|
||||
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
|
||||
+ else
|
||||
+ update_cq_db(ctx, cq);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void wc_end_poll_cq(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
|
||||
+
|
||||
+ if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
|
||||
+ *cq->db = cq->cons_index & RECORD_DB_CI_MASK;
|
||||
+ else
|
||||
+ update_cq_db(ctx, cq);
|
||||
+
|
||||
+ pthread_spin_unlock(&cq->lock);
|
||||
+}
|
||||
+
|
||||
+static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE);
|
||||
+
|
||||
+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ)
|
||||
+ return wc_send_op_map[opcode];
|
||||
+ else
|
||||
+ return wc_rcv_op_map[opcode];
|
||||
+}
|
||||
+
|
||||
+static uint32_t wc_read_vendor_err(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return hr_reg_read(cq->cqe, CQE_SUB_STATUS);
|
||||
+}
|
||||
+
|
||||
+static uint32_t wc_read_byte_len(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return le32toh(cq->cqe->byte_cnt);
|
||||
+}
|
||||
+
|
||||
+static __be32 wc_read_imm_data(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ if (hr_reg_read(cq->cqe, CQE_OPCODE) == HNS_ROCE_RECV_OP_SEND_WITH_INV)
|
||||
+ /* This is returning invalidate_rkey which is in host order, see
|
||||
+ * ibv_wc_read_invalidated_rkey.
|
||||
+ */
|
||||
+ return (__force __be32)le32toh(cq->cqe->rkey);
|
||||
+
|
||||
+ return htobe32(le32toh(cq->cqe->immtdata));
|
||||
+}
|
||||
+
|
||||
+static uint32_t wc_read_qp_num(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return hr_reg_read(cq->cqe, CQE_LCL_QPN);
|
||||
+}
|
||||
+
|
||||
+static uint32_t wc_read_src_qp(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return hr_reg_read(cq->cqe, CQE_RMT_QPN);
|
||||
+}
|
||||
+
|
||||
+static unsigned int get_wc_flags_for_sq(uint8_t opcode)
|
||||
+{
|
||||
+ switch (opcode) {
|
||||
+ case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
|
||||
+ case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
|
||||
+ return IBV_WC_WITH_IMM;
|
||||
+ case HNS_ROCE_SQ_OP_LOCAL_INV:
|
||||
+ return IBV_WC_WITH_INV;
|
||||
+ default:
|
||||
+ return 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static unsigned int get_wc_flags_for_rq(uint8_t opcode)
|
||||
+{
|
||||
+ switch (opcode) {
|
||||
+ case HNS_ROCE_RECV_OP_RDMA_WRITE_IMM:
|
||||
+ case HNS_ROCE_RECV_OP_SEND_WITH_IMM:
|
||||
+ return IBV_WC_WITH_IMM;
|
||||
+ case HNS_ROCE_RECV_OP_SEND_WITH_INV:
|
||||
+ return IBV_WC_WITH_INV;
|
||||
+ default:
|
||||
+ return 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static unsigned int wc_read_wc_flags(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+ uint8_t opcode = hr_reg_read(cq->cqe, CQE_OPCODE);
|
||||
+ unsigned int wc_flags;
|
||||
+
|
||||
+ if (hr_reg_read(cq->cqe, CQE_S_R) == CQE_FOR_SQ) {
|
||||
+ wc_flags = get_wc_flags_for_sq(opcode);
|
||||
+ } else {
|
||||
+ wc_flags = get_wc_flags_for_rq(opcode);
|
||||
+ wc_flags |= hr_reg_read(cq->cqe, CQE_GRH) ? IBV_WC_GRH : 0;
|
||||
+ }
|
||||
+
|
||||
+ return wc_flags;
|
||||
+}
|
||||
+
|
||||
+static uint32_t wc_read_slid(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static uint8_t wc_read_sl(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return (uint8_t)hr_reg_read(cq->cqe, CQE_SL);
|
||||
+}
|
||||
+
|
||||
+static uint8_t wc_read_dlid_path_bits(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static uint16_t wc_read_cvlan(struct ibv_cq_ex *current)
|
||||
+{
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
+
|
||||
+ return hr_reg_read(cq->cqe, CQE_VID_VLD) ?
|
||||
+ hr_reg_read(cq->cqe, CQE_VID) : 0;
|
||||
+}
|
||||
+
|
||||
+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags)
|
||||
+{
|
||||
+ cq_ex->start_poll = wc_start_poll_cq;
|
||||
+ cq_ex->next_poll = wc_next_poll_cq;
|
||||
+ cq_ex->end_poll = wc_end_poll_cq;
|
||||
+ cq_ex->read_opcode = wc_read_opcode;
|
||||
+ cq_ex->read_vendor_err = wc_read_vendor_err;
|
||||
+ cq_ex->read_wc_flags = wc_read_wc_flags;
|
||||
+
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
|
||||
+ cq_ex->read_byte_len = wc_read_byte_len;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_IMM)
|
||||
+ cq_ex->read_imm_data = wc_read_imm_data;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_QP_NUM)
|
||||
+ cq_ex->read_qp_num = wc_read_qp_num;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_SRC_QP)
|
||||
+ cq_ex->read_src_qp = wc_read_src_qp;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_SLID)
|
||||
+ cq_ex->read_slid = wc_read_slid;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_SL)
|
||||
+ cq_ex->read_sl = wc_read_sl;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
|
||||
+ cq_ex->read_dlid_path_bits = wc_read_dlid_path_bits;
|
||||
+ if (wc_flags & IBV_WC_EX_WITH_CVLAN)
|
||||
+ cq_ex->read_cvlan = wc_read_cvlan;
|
||||
+}
|
||||
+
|
||||
const struct hns_roce_u_hw hns_roce_u_hw_v2 = {
|
||||
.hw_version = HNS_ROCE_HW_VER2,
|
||||
.hw_ops = {
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index 92e5f1a4..0068f4fe 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -337,5 +337,6 @@ struct hns_roce_ud_sq_wqe {
|
||||
#define MAX_SERVICE_LEVEL 0x7
|
||||
|
||||
void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
|
||||
+void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
|
||||
|
||||
#endif /* _HNS_ROCE_U_HW_V2_H */
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index a993c39a..9ea8a6d3 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -276,13 +276,21 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+enum {
|
||||
+ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
|
||||
+ IBV_WC_EX_WITH_CVLAN,
|
||||
+};
|
||||
+
|
||||
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
|
||||
struct hns_roce_context *context)
|
||||
{
|
||||
if (!attr->cqe || attr->cqe > context->max_cqe)
|
||||
return -EINVAL;
|
||||
|
||||
- if (attr->comp_mask || attr->wc_flags)
|
||||
+ if (attr->comp_mask)
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
|
||||
@@ -409,7 +417,13 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
|
||||
struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
|
||||
struct ibv_cq_init_attr_ex *attr)
|
||||
{
|
||||
- return create_cq(context, attr);
|
||||
+ struct ibv_cq_ex *cq;
|
||||
+
|
||||
+ cq = create_cq(context, attr);
|
||||
+ if (cq)
|
||||
+ hns_roce_attach_cq_ex_ops(cq, attr->wc_flags);
|
||||
+
|
||||
+ return cq;
|
||||
}
|
||||
|
||||
void hns_roce_u_cq_event(struct ibv_cq *cq)
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,160 +0,0 @@
|
||||
From 2d48954e9b2617cb48f7d5ba47a10ceda4e556ff Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Mon, 7 Mar 2022 18:49:37 +0800
|
||||
Subject: libhns: Optimize the error handling of CQE
|
||||
|
||||
Separate the acquisition of wc->status and wc->vendor_err to make the logic
|
||||
of error handling clearer.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 81 ++++++++------------------------
|
||||
1 file changed, 19 insertions(+), 62 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 081ab1f3..2804450d 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -146,13 +146,13 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
- uint8_t status)
|
||||
+static enum ibv_wc_status get_wc_status(uint8_t status)
|
||||
{
|
||||
static const struct {
|
||||
unsigned int cqe_status;
|
||||
enum ibv_wc_status wc_status;
|
||||
} map[] = {
|
||||
+ { HNS_ROCE_V2_CQE_SUCCESS, IBV_WC_SUCCESS },
|
||||
{ HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
|
||||
{ HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
|
||||
{ HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
|
||||
@@ -169,17 +169,12 @@ static void handle_error_cqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
{ HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
|
||||
};
|
||||
|
||||
- int i;
|
||||
-
|
||||
- wc->status = IBV_WC_GENERAL_ERR;
|
||||
- for (i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
- if (status == map[i].cqe_status) {
|
||||
- wc->status = map[i].wc_status;
|
||||
- break;
|
||||
- }
|
||||
+ for (int i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
+ if (status == map[i].cqe_status)
|
||||
+ return map[i].wc_status;
|
||||
}
|
||||
|
||||
- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
|
||||
+ return IBV_WC_GENERAL_ERR;
|
||||
}
|
||||
|
||||
static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
|
||||
@@ -581,7 +576,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
return V2_CQ_POLL_ERR;
|
||||
}
|
||||
|
||||
- status = hr_reg_read(cqe, CQE_STATUS);
|
||||
opcode = hr_reg_read(cqe, CQE_OPCODE);
|
||||
is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
|
||||
if (is_send) {
|
||||
@@ -603,18 +597,18 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
|
||||
wc->qp_num = qpn;
|
||||
|
||||
+ status = hr_reg_read(cqe, CQE_STATUS);
|
||||
+ wc->status = get_wc_status(status);
|
||||
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
|
||||
+
|
||||
+ if (status == HNS_ROCE_V2_CQE_SUCCESS)
|
||||
+ return V2_CQ_OK;
|
||||
+
|
||||
/*
|
||||
* once a cqe in error status, the driver needs to help the HW to
|
||||
* generated flushed cqes for all subsequent wqes
|
||||
*/
|
||||
- if (status != HNS_ROCE_V2_CQE_SUCCESS) {
|
||||
- handle_error_cqe(cqe, wc, status);
|
||||
- return hns_roce_flush_cqe(*cur_qp, status);
|
||||
- }
|
||||
-
|
||||
- wc->status = IBV_WC_SUCCESS;
|
||||
-
|
||||
- return V2_CQ_OK;
|
||||
+ return hns_roce_flush_cqe(*cur_qp, status);
|
||||
}
|
||||
|
||||
static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
@@ -1706,40 +1700,6 @@ static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void handle_error_cqe_ex(struct hns_roce_cq *cq, uint8_t status)
|
||||
-{
|
||||
- int i;
|
||||
-
|
||||
- static const struct {
|
||||
- unsigned int cqe_status;
|
||||
- enum ibv_wc_status wc_status;
|
||||
- } map[] = {
|
||||
- { HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR, IBV_WC_LOC_LEN_ERR },
|
||||
- { HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR },
|
||||
- { HNS_ROCE_V2_CQE_LOCAL_PROT_ERR, IBV_WC_LOC_PROT_ERR },
|
||||
- { HNS_ROCE_V2_CQE_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR },
|
||||
- { HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR, IBV_WC_MW_BIND_ERR },
|
||||
- { HNS_ROCE_V2_CQE_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR },
|
||||
- { HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR },
|
||||
- { HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR, IBV_WC_REM_INV_REQ_ERR },
|
||||
- { HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR },
|
||||
- { HNS_ROCE_V2_CQE_REMOTE_OP_ERR, IBV_WC_REM_OP_ERR },
|
||||
- { HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR,
|
||||
- IBV_WC_RETRY_EXC_ERR },
|
||||
- { HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
|
||||
- { HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
|
||||
- { HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
|
||||
- };
|
||||
-
|
||||
- cq->verbs_cq.cq_ex.status = IBV_WC_GENERAL_ERR;
|
||||
- for (i = 0; i < ARRAY_SIZE(map); i++) {
|
||||
- if (status == map[i].cqe_status) {
|
||||
- cq->verbs_cq.cq_ex.status = map[i].wc_status;
|
||||
- break;
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
|
||||
{
|
||||
struct hns_roce_qp *qp = NULL;
|
||||
@@ -1765,19 +1725,16 @@ static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
|
||||
return V2_CQ_POLL_ERR;
|
||||
|
||||
status = hr_reg_read(cqe, CQE_STATUS);
|
||||
+ cq->verbs_cq.cq_ex.status = get_wc_status(status);
|
||||
+
|
||||
+ if (status == HNS_ROCE_V2_CQE_SUCCESS)
|
||||
+ return V2_CQ_OK;
|
||||
|
||||
/*
|
||||
* once a cqe in error status, the driver needs to help the HW to
|
||||
* generated flushed cqes for all subsequent wqes
|
||||
*/
|
||||
- if (status != HNS_ROCE_V2_CQE_SUCCESS) {
|
||||
- handle_error_cqe_ex(cq, status);
|
||||
- return hns_roce_flush_cqe(qp, status);
|
||||
- }
|
||||
-
|
||||
- cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS;
|
||||
-
|
||||
- return V2_CQ_OK;
|
||||
+ return hns_roce_flush_cqe(qp, status);
|
||||
}
|
||||
|
||||
static int wc_start_poll_cq(struct ibv_cq_ex *current,
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,323 +0,0 @@
|
||||
From 9dd7b55957ccc720a6844613af9d43680d8fbaad Mon Sep 17 00:00:00 2001
|
||||
From: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Date: Mon, 7 Mar 2022 18:49:38 +0800
|
||||
Subject: libhns: Refactor hns roce v2 poll one() and wc poll cqe()
|
||||
|
||||
hns_roce_v2_poll_one() and wc_poll_cqe() have a lot of repetitive code.
|
||||
Aggregating the repetitive parts of these two functions into one function
|
||||
hns_roce_poll_one() can reduce the repetitive code.
|
||||
|
||||
Signed-off-by: Xinhao Liu <liuxinhao5@hisilicon.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 228 +++++++++++++++----------------
|
||||
1 file changed, 107 insertions(+), 121 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 2804450d..42a77151 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -285,6 +285,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
|
||||
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
struct hns_roce_qp *qp)
|
||||
+
|
||||
{
|
||||
struct hns_roce_db sq_db = {};
|
||||
|
||||
@@ -548,21 +549,101 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
||||
wc->opcode = wc_send_op_map[opcode];
|
||||
}
|
||||
|
||||
-static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
- struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
|
||||
+static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
|
||||
+ struct hns_roce_cq *cq)
|
||||
{
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
|
||||
+ struct hns_roce_wq *wq = &hr_qp->sq;
|
||||
+
|
||||
+ if (hr_qp->sq_signal_bits)
|
||||
+ wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
|
||||
+
|
||||
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+}
|
||||
+
|
||||
+static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
|
||||
+ struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
|
||||
+ hns_roce_free_srq_wqe(srq, wqe_idx);
|
||||
+}
|
||||
+
|
||||
+static void cqe_proc_rq(struct hns_roce_wq *wq, struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
+ ++wq->tail;
|
||||
+}
|
||||
+
|
||||
+static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
|
||||
+ struct hns_roce_cq *cq)
|
||||
+{
|
||||
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
|
||||
+ struct hns_roce_srq *srq = NULL;
|
||||
+ uint32_t wqe_idx;
|
||||
+
|
||||
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
|
||||
+ cqe_proc_sq(qp, wqe_idx, cq);
|
||||
+ } else {
|
||||
+ if (get_srq_from_cqe(cqe, ctx, qp, &srq))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ if (srq)
|
||||
+ cqe_proc_srq(srq, wqe_idx, cq);
|
||||
+ else
|
||||
+ cqe_proc_rq(&qp->rq, cq);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq,
|
||||
+ struct hns_roce_qp *cur_qp, struct ibv_wc *wc)
|
||||
+{
|
||||
+ struct hns_roce_v2_cqe *cqe = cq->cqe;
|
||||
struct hns_roce_srq *srq = NULL;
|
||||
- struct hns_roce_v2_cqe *cqe;
|
||||
uint8_t opcode;
|
||||
- uint8_t status;
|
||||
+
|
||||
+ if (!wc) {
|
||||
+ if (cqe_proc_wq(ctx, cur_qp, cq))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ opcode = hr_reg_read(cqe, CQE_OPCODE);
|
||||
+
|
||||
+ if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
|
||||
+ parse_cqe_for_req(cqe, wc, cur_qp, opcode);
|
||||
+ } else {
|
||||
+ wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
+ get_opcode_for_resp(cqe, wc, opcode);
|
||||
+
|
||||
+ if (get_srq_from_cqe(cqe, ctx, cur_qp, &srq))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
+
|
||||
+ if (srq)
|
||||
+ parse_cqe_for_srq(cqe, wc, srq);
|
||||
+ else
|
||||
+ parse_cqe_for_resp(cqe, wc, cur_qp, opcode);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hns_roce_poll_one(struct hns_roce_context *ctx,
|
||||
+ struct hns_roce_qp **cur_qp, struct hns_roce_cq *cq,
|
||||
+ struct ibv_wc *wc)
|
||||
+{
|
||||
+ struct hns_roce_v2_cqe *cqe;
|
||||
+ uint8_t status, wc_status;
|
||||
uint32_t qpn;
|
||||
- bool is_send;
|
||||
|
||||
cqe = next_cqe_sw_v2(cq);
|
||||
if (!cqe)
|
||||
- return V2_CQ_EMPTY;
|
||||
+ return wc ? V2_CQ_EMPTY : ENOENT;
|
||||
|
||||
+ cq->cqe = cqe;
|
||||
++cq->cons_index;
|
||||
|
||||
udma_from_device_barrier();
|
||||
@@ -576,31 +657,20 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
return V2_CQ_POLL_ERR;
|
||||
}
|
||||
|
||||
- opcode = hr_reg_read(cqe, CQE_OPCODE);
|
||||
- is_send = hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ;
|
||||
- if (is_send) {
|
||||
- parse_cqe_for_req(cqe, wc, *cur_qp, opcode);
|
||||
- } else {
|
||||
- wc->byte_len = le32toh(cqe->byte_cnt);
|
||||
- get_opcode_for_resp(cqe, wc, opcode);
|
||||
+ if (parse_cqe_for_cq(ctx, cq, *cur_qp, wc))
|
||||
+ return V2_CQ_POLL_ERR;
|
||||
|
||||
- if (get_srq_from_cqe(cqe, ctx, *cur_qp, &srq))
|
||||
- return V2_CQ_POLL_ERR;
|
||||
+ status = hr_reg_read(cqe, CQE_STATUS);
|
||||
+ wc_status = get_wc_status(status);
|
||||
|
||||
- if (srq) {
|
||||
- parse_cqe_for_srq(cqe, wc, srq);
|
||||
- } else {
|
||||
- if (parse_cqe_for_resp(cqe, wc, *cur_qp, opcode))
|
||||
- return V2_CQ_POLL_ERR;
|
||||
- }
|
||||
+ if (wc) {
|
||||
+ wc->status = wc_status;
|
||||
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
|
||||
+ wc->qp_num = qpn;
|
||||
+ } else {
|
||||
+ cq->verbs_cq.cq_ex.status = wc_status;
|
||||
}
|
||||
|
||||
- wc->qp_num = qpn;
|
||||
-
|
||||
- status = hr_reg_read(cqe, CQE_STATUS);
|
||||
- wc->status = get_wc_status(status);
|
||||
- wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
|
||||
-
|
||||
if (status == HNS_ROCE_V2_CQE_SUCCESS)
|
||||
return V2_CQ_OK;
|
||||
|
||||
@@ -614,16 +684,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
|
||||
static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
struct ibv_wc *wc)
|
||||
{
|
||||
- int npolled;
|
||||
- int err = V2_CQ_OK;
|
||||
- struct hns_roce_qp *qp = NULL;
|
||||
- struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
|
||||
+ struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
+ struct hns_roce_qp *qp = NULL;
|
||||
+ int err = V2_CQ_OK;
|
||||
+ int npolled;
|
||||
|
||||
pthread_spin_lock(&cq->lock);
|
||||
|
||||
for (npolled = 0; npolled < ne; ++npolled) {
|
||||
- err = hns_roce_v2_poll_one(cq, &qp, wc + npolled);
|
||||
+ err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
|
||||
if (err != V2_CQ_OK)
|
||||
break;
|
||||
}
|
||||
@@ -1651,97 +1721,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
|
||||
- struct hns_roce_cq *cq)
|
||||
-{
|
||||
- struct hns_roce_wq *wq = &hr_qp->sq;
|
||||
-
|
||||
- if (hr_qp->sq_signal_bits)
|
||||
- wq->tail += (wqe_idx - wq->tail) & (wq->wqe_cnt - 1);
|
||||
-
|
||||
- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
- ++wq->tail;
|
||||
-}
|
||||
-
|
||||
-static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
|
||||
- struct hns_roce_cq *cq)
|
||||
-{
|
||||
- cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
|
||||
- hns_roce_free_srq_wqe(srq, wqe_idx);
|
||||
-}
|
||||
-
|
||||
-static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
|
||||
-{
|
||||
- struct hns_roce_wq *wq = &hr_qp->rq;
|
||||
-
|
||||
- cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
|
||||
- ++wq->tail;
|
||||
-}
|
||||
-
|
||||
-static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
|
||||
- struct hns_roce_cq *cq)
|
||||
-{
|
||||
- struct hns_roce_v2_cqe *cqe = cq->cqe;
|
||||
- struct hns_roce_srq *srq = NULL;
|
||||
- uint32_t wqe_idx;
|
||||
-
|
||||
- wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
|
||||
- if (hr_reg_read(cqe, CQE_S_R) == CQE_FOR_SQ) {
|
||||
- cqe_proc_sq(qp, wqe_idx, cq);
|
||||
- } else {
|
||||
- if (get_srq_from_cqe(cqe, ctx, qp, &srq))
|
||||
- return V2_CQ_POLL_ERR;
|
||||
-
|
||||
- if (srq)
|
||||
- cqe_proc_srq(srq, wqe_idx, cq);
|
||||
- else
|
||||
- cqe_proc_rq(qp, cq);
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int wc_poll_cqe(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
|
||||
-{
|
||||
- struct hns_roce_qp *qp = NULL;
|
||||
- struct hns_roce_v2_cqe *cqe;
|
||||
- uint8_t status;
|
||||
- uint32_t qpn;
|
||||
-
|
||||
- cqe = next_cqe_sw_v2(cq);
|
||||
- if (!cqe)
|
||||
- return ENOENT;
|
||||
-
|
||||
- ++cq->cons_index;
|
||||
- udma_from_device_barrier();
|
||||
-
|
||||
- cq->cqe = cqe;
|
||||
- qpn = hr_reg_read(cqe, CQE_LCL_QPN);
|
||||
-
|
||||
- qp = hns_roce_v2_find_qp(ctx, qpn);
|
||||
- if (!qp)
|
||||
- return V2_CQ_POLL_ERR;
|
||||
-
|
||||
- if (cqe_proc_wq(ctx, qp, cq))
|
||||
- return V2_CQ_POLL_ERR;
|
||||
-
|
||||
- status = hr_reg_read(cqe, CQE_STATUS);
|
||||
- cq->verbs_cq.cq_ex.status = get_wc_status(status);
|
||||
-
|
||||
- if (status == HNS_ROCE_V2_CQE_SUCCESS)
|
||||
- return V2_CQ_OK;
|
||||
-
|
||||
- /*
|
||||
- * once a cqe in error status, the driver needs to help the HW to
|
||||
- * generated flushed cqes for all subsequent wqes
|
||||
- */
|
||||
- return hns_roce_flush_cqe(qp, status);
|
||||
-}
|
||||
-
|
||||
static int wc_start_poll_cq(struct ibv_cq_ex *current,
|
||||
struct ibv_poll_cq_attr *attr)
|
||||
{
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
struct hns_roce_context *ctx = to_hr_ctx(current->context);
|
||||
+ struct hns_roce_qp *qp = NULL;
|
||||
int err;
|
||||
|
||||
if (attr->comp_mask)
|
||||
@@ -1749,7 +1734,7 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
|
||||
|
||||
pthread_spin_lock(&cq->lock);
|
||||
|
||||
- err = wc_poll_cqe(ctx, cq);
|
||||
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
|
||||
if (err != V2_CQ_OK)
|
||||
pthread_spin_unlock(&cq->lock);
|
||||
|
||||
@@ -1760,9 +1745,10 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
|
||||
{
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
|
||||
struct hns_roce_context *ctx = to_hr_ctx(current->context);
|
||||
+ struct hns_roce_qp *qp = NULL;
|
||||
int err;
|
||||
|
||||
- err = wc_poll_cqe(ctx, cq);
|
||||
+ err = hns_roce_poll_one(ctx, &qp, cq, NULL);
|
||||
if (err != V2_CQ_OK)
|
||||
return err;
|
||||
|
||||
--
|
||||
2.30.0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,58 +0,0 @@
|
||||
From 1e5f8bb89169453cfdd17bf58cef7186dcf58596 Mon Sep 17 00:00:00 2001
|
||||
From: Youming Luo <luoyouming@huawei.com>
|
||||
Date: Wed, 16 Mar 2022 17:36:39 +0800
|
||||
Subject: libhns: Add general error type for CQE
|
||||
|
||||
If a Work Request posted in an RQ of UD QP isn't big enough for holding the
|
||||
incoming message, then the hns ROCEE will generate a general error CQE. The
|
||||
IB specification does not specify this type of CQE.
|
||||
|
||||
In the case of unreliable communication, it is not desirable to change the
|
||||
QP to an error state due to an insufficient receive length error. So If the
|
||||
hns ROCEE reports a general error CQE, it's no need to set the QP to an
|
||||
error state, and the driver should skip it.
|
||||
|
||||
Signed-off-by: Youming Luo <luoyouming@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 +++-
|
||||
providers/hns/hns_roce_u_hw_v2.h | 1 +
|
||||
2 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 42a77151..fab1939b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -166,6 +166,7 @@ static enum ibv_wc_status get_wc_status(uint8_t status)
|
||||
{ HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR, IBV_WC_RETRY_EXC_ERR },
|
||||
{ HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR },
|
||||
{ HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR, IBV_WC_REM_ABORT_ERR },
|
||||
+ { HNS_ROCE_V2_CQE_GENERAL_ERR, IBV_WC_GENERAL_ERR },
|
||||
{ HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR, IBV_WC_REM_INV_RD_REQ_ERR },
|
||||
};
|
||||
|
||||
@@ -671,7 +672,8 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx,
|
||||
cq->verbs_cq.cq_ex.status = wc_status;
|
||||
}
|
||||
|
||||
- if (status == HNS_ROCE_V2_CQE_SUCCESS)
|
||||
+ if (status == HNS_ROCE_V2_CQE_SUCCESS ||
|
||||
+ status == HNS_ROCE_V2_CQE_GENERAL_ERR)
|
||||
return V2_CQ_OK;
|
||||
|
||||
/*
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index 0068f4fe..122fdbdf 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -110,6 +110,7 @@ enum {
|
||||
HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR = 0x15,
|
||||
HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR = 0x16,
|
||||
HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR = 0x22,
|
||||
+ HNS_ROCE_V2_CQE_GENERAL_ERR = 0x23,
|
||||
HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR = 0x24,
|
||||
};
|
||||
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
From 847336b7634b51548996b879f42c786a108885f1 Mon Sep 17 00:00:00 2001
|
||||
From: Chengchang Tang <tangchengchang@huawei.com>
|
||||
Date: Fri, 8 Apr 2022 11:31:07 +0800
|
||||
Subject: [PATCH 46/47] libhns: Fix the shift size of SQ WQE
|
||||
|
||||
Currently, the shift size of SQ WQE is based on the size of the SQ WQE
|
||||
structure of HIP06. Although the size of SQ WQE of HIP08 is the same as
|
||||
the size of SQ WQE of HIP06, it is not a correct way for HIP08 to use the
|
||||
structure of HIP06 to define the size of SQ WQE.
|
||||
|
||||
Fixes: b6cd213b276f ("libhns: Refactor for creating qp")
|
||||
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 1457a1a2..215d82ec 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -1068,8 +1068,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||||
}
|
||||
|
||||
if (attr->cap.max_send_wr) {
|
||||
- qp->sq.wqe_shift =
|
||||
- hr_ilog32(sizeof(struct hns_roce_rc_send_wqe));
|
||||
+ qp->sq.wqe_shift = HNS_ROCE_SQWQE_SHIFT;
|
||||
cnt = roundup_pow_of_two(attr->cap.max_send_wr);
|
||||
qp->sq.wqe_cnt = cnt;
|
||||
qp->sq.shift = hr_ilog32(cnt);
|
||||
--
|
||||
2.30.0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,27 +0,0 @@
|
||||
From c381cfa26ba6163b9cc51212702e64bf1d83f838 Mon Sep 17 00:00:00 2001
|
||||
From: swimlessbird <52704385+swimlessbird@users.noreply.github.com>
|
||||
Date: Fri, 17 Sep 2021 14:35:05 +0800
|
||||
Subject: [PATCH] ibdiags: Increase maximum number of CPUs
|
||||
|
||||
In modern systems, the old limit (8) is small enough, so increase
|
||||
to something larger (256).
|
||||
|
||||
Signed-off-by: Suwan Sun <swimlessbird@gmail.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
infiniband-diags/ibsysstat.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/infiniband-diags/ibsysstat.c b/infiniband-diags/ibsysstat.c
|
||||
index 6ff7ca0c4..73972d039 100644
|
||||
--- a/infiniband-diags/ibsysstat.c
|
||||
+++ b/infiniband-diags/ibsysstat.c
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
#include "ibdiag_common.h"
|
||||
|
||||
-#define MAX_CPUS 8
|
||||
+#define MAX_CPUS 256
|
||||
|
||||
static struct ibmad_port *srcport;
|
||||
|
||||
Binary file not shown.
BIN
rdma-core-41.0.tar.gz
Normal file
BIN
rdma-core-41.0.tar.gz
Normal file
Binary file not shown.
@ -1,60 +1,11 @@
|
||||
Name: rdma-core
|
||||
Version: 35.1
|
||||
Release: 7
|
||||
Version: 41.0
|
||||
Release: 1
|
||||
Summary: RDMA core userspace libraries and daemons
|
||||
License: GPLv2 or BSD
|
||||
Url: https://github.com/linux-rdma/rdma-core
|
||||
Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||
|
||||
Patch0: backport-fixbug-increase-maximum-number-of-cpus-rdma.patch
|
||||
Patch1: 0001-Update-kernel-headers.patch
|
||||
Patch2: 0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch
|
||||
Patch3: 0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch
|
||||
Patch4: 0004-libhns-Remove-unsupported-QP-type.patch
|
||||
Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch
|
||||
Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch
|
||||
Patch7: 0007-libhns-Add-support-for-direct-wqe.patch
|
||||
Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch
|
||||
Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch
|
||||
Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch
|
||||
Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch
|
||||
Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch
|
||||
Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch
|
||||
Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch
|
||||
Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch
|
||||
Patch16: 0016-libhns-Optimize-set_sge-process.patch
|
||||
Patch17: 0017-verbs-Add-generic-logging-API.patch
|
||||
Patch18: 0018-libhns-Use-the-verbs-logging-API-instead-of-printf-f.patch
|
||||
Patch19: 0019-libhns-The-function-declaration-should-be-the-same-a.patch
|
||||
Patch20: 0020-libhns-The-content-of-the-header-file-should-be-prot.patch
|
||||
Patch21: 0021-libhns-Fix-wrong-type-of-variables-and-fields.patch
|
||||
Patch22: 0022-libhns-Fix-wrong-print-format-for-unsigned-type.patch
|
||||
Patch23: 0023-libhns-Remove-redundant-variable-initialization.patch
|
||||
Patch24: 0024-libhns-Remove-unused-macros.patch
|
||||
Patch25: 0025-libhns-Refactor-the-poll-one-interface.patch
|
||||
Patch26: 0026-libhns-hr-ilog32-should-be-represented-by-a-function.patch
|
||||
Patch27: 0027-libhns-Fix-the-size-setting-error-when-copying-CQE-i.patch
|
||||
Patch28: 0028-libhns-Fix-the-problem-that-XRC-does-not-need-to-cre.patch
|
||||
Patch29: 0029-libhns-Add-vendor_err-information-for-error-WC.patch
|
||||
Patch30: 0030-libhns-Forcibly-rewrite-the-inline-flag-of-WQE.patch
|
||||
Patch31: 0031-libhns-Forcibly-rewrite-the-strong-order-flag-of-WQE.patch
|
||||
Patch32: 0032-util-Fix-mmio-memcpy-on-ARM.patch
|
||||
Patch33: 0033-libhns-Use-new-interfaces-hr-reg-to-operate-the-WQE-.patch
|
||||
Patch34: 0034-libhns-Use-new-interfaces-hr-reg-to-operate-the-DB-f.patch
|
||||
Patch35: 0035-libhns-Add-new-interfaces-hr-reg-to-operate-the-CQE-.patch
|
||||
Patch36: 0036-libhns-Fix-the-calculation-of-QP-SRQ-table-size.patch
|
||||
Patch37: 0037-libhns-Fix-wrong-HIP08-version-macro.patch
|
||||
Patch38: 0038-libhns-Fix-out-of-bounds-write-when-filling-inline-d.patch
|
||||
Patch39: 0039-libhns-Clear-remaining-unused-sges-when-post-recv.patch
|
||||
Patch40: 0040-libhns-Add-support-for-creating-extended-CQ.patch
|
||||
Patch41: 0041-libhns-Extended-CQ-supports-the-new-polling-mechanis.patch
|
||||
Patch42: 0042-libhns-Optimize-the-error-handling-of-CQE.patch
|
||||
Patch43: 0043-libhns-Refactor-hns-roce-v2-poll-one-and-wc-poll-cqe.patch
|
||||
Patch44: 0044-libhns-Extended-QP-supports-the-new-post-send-mechan.patch
|
||||
Patch45: 0045-libhns-Add-general-error-type-for-CQE.patch
|
||||
Patch46: 0046-libhns-Fix-the-shift-size-of-SQ-WQE.patch
|
||||
Patch47: 0047-libhns-Remove-support-for-HIP06.patch
|
||||
|
||||
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
||||
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
||||
BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators
|
||||
@ -298,6 +249,12 @@ fi
|
||||
%{_mandir}/*
|
||||
|
||||
%changelog
|
||||
* Tue Sep 27 2022 tangchengchang <tangchengchang@huawei.com> - 41.0-1
|
||||
- Type: requirement
|
||||
- ID: NA
|
||||
- SUG: NA
|
||||
- DESC: update to 41.0
|
||||
|
||||
* Tue Sep 06 2022 luozhengfeng <luozhengfeng@h-partners.com> - 35.1-7
|
||||
- Type: bugfix
|
||||
- ID: NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user