2026-01-29 22:25:33 +08:00

567 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2024 Huawei Technologies Co., Ltd
#include "roce_event.h"
#include "roce_event_extension.h"
#include "roce_pub_cmd.h"
#include "hinic3_mt.h"
#ifdef ROCE_BONDING_EN
#include "roce_bond.h"
#endif
static void roce3_event_report_common_a(const struct roce3_device *rdev, int event_str_index)
{
switch (event_str_index) {
case OFED_ET_PATH_MIG:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Path mig. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_COMM_EST:
/*lint -e160 -e522*/
roce3_pr_err_once("[ROCE] %s: [ofed event type] Communication establish. function id: %u\n",
__func__, rdev->glb_func_id);
/*lint +e160 +e522*/
break;
case OFED_ET_SQ_DRAINED:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Sq drained. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_SRQ_QP_LAST_WQE:
/*lint -e160 -e522*/
roce3_pr_err_once("[ROCE] %s: [ofed event type] Srq/qp last wqe. function id: %u\n",
__func__, rdev->glb_func_id);
/*lint +e160 +e522*/
break;
case OFED_ET_WQ_CATAS_ERR:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Wq catas error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_PATH_MIG_FAILED:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Path mig failed. function id: %u\n",
__func__, rdev->glb_func_id);
break;
default:
break;
}
}
static void roce3_event_report_common_b(const struct roce3_device *rdev, int event_str_index)
{
switch (event_str_index) {
case OFED_ET_WQ_INVAL_REQ_ERR:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Wq inval req error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_WQ_ACCESS_ERR:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Wq access error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_CQ_ERR:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Cq error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_SRQ_LIMIT:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Srq limit. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case OFED_ET_SRQ_CATAS_ERR:
pr_err_ratelimited("[ROCE] %s: [ofed event type] Srq catas error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case NON_OFED_ET_QPC_LOOKUP_ERR:
pr_err_ratelimited("[ROCE] %s: [non ofed event type] Qpc lookup error. function id: %u\n",
__func__, rdev->glb_func_id);
break;
case NON_OFED_ET_OTHER_TYPE_ERR:
/*lint -e160 -e522*/
roce3_pr_err_once("[ROCE] %s: [non ofed event type] other type error. function id: %u\n",
__func__, rdev->glb_func_id);
/*lint +e160 +e522*/
break;
default:
break;
}
}
static void roce3_event_report_common(const struct roce3_device *rdev, int event_str_index)
{
if (event_str_index <= OFED_ET_PATH_MIG_FAILED)
roce3_event_report_common_a(rdev, event_str_index);
else
roce3_event_report_common_b(rdev, event_str_index);
}
/*
****************************************************************************
Prototype : roce3_event_report
Description : roce3_event_report
Input : struct roce3_device *rdev
int event_str_index
Output : None
1.Date : 2017/5/4
Modification : Created function
****************************************************************************
*/
static void roce3_event_report(struct roce3_device *rdev, int event_str_index)
{
if (event_str_index <= NON_OFED_ET_OTHER_TYPE_ERR)
roce3_event_report_common(rdev, event_str_index);
else
roce3_event_report_extend(rdev, event_str_index);
}
/*
****************************************************************************
Prototype : to_qp_event_str_index
Description : to_qp_event_str_index
Input : u8 event_type
Output : None
1.Date : 2017/5/4
Modification : Created function
****************************************************************************
*/
static int to_qp_event_str_index(u8 event_type)
{
switch (event_type) {
case ROCE_EVENT_TYPE_PATH_MIG:
return OFED_ET_PATH_MIG;
case ROCE_EVENT_TYPE_COMM_EST:
return OFED_ET_COMM_EST;
case ROCE_EVENT_TYPE_SQ_DRAINED:
return OFED_ET_SQ_DRAINED;
case ROCE_EVENT_TYPE_SRQ_QP_LAST_WQE:
return OFED_ET_SRQ_QP_LAST_WQE;
case ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
return OFED_ET_WQ_CATAS_ERR;
case ROCE_EVENT_TYPE_PATH_MIG_FAILED:
return OFED_ET_PATH_MIG_FAILED;
case ROCE_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
return OFED_ET_WQ_INVAL_REQ_ERR;
default:
return OFED_ET_WQ_ACCESS_ERR;
}
}
int to_unknown_event_str_index(u8 event_type, const u8 *val)
{
switch (event_type) {
case ROCE_EVENT_TYPE_OFED_NO_DEF:
if (*((u64 *)val) == QPC_LOOKUP_ERR_VALUE)
return NON_OFED_ET_QPC_LOOKUP_ERR;
return NON_OFED_ET_OTHER_TYPE_ERR;
case ROCE_EVENT_TYPE_LOCAL_CATAS_ERROR:
return NON_OFED_ET_OTHER_TYPE_ERR;
default:
return INVAL_ET_ERR;
}
}
/*
****************************************************************************
Prototype : roce3_handle_qp_async_event
Description : roce3_handle_qp_async_event
Input : struct roce3_device *rdev
u32 qpn
u8 event_type
Output : None
1.Date : 2017/5/4
Modification : Created function
****************************************************************************
*/
static void roce3_handle_qp_async_event(struct roce3_device *rdev, u32 qpn, u8 event_type)
{
struct roce3_qp *rqp = NULL;
struct tag_cqm_object *cqm_obj_queue = NULL;
cqm_obj_queue = cqm_object_get(rdev->hwdev, CQM_OBJECT_SERVICE_CTX, qpn, false);
if (cqm_obj_queue == NULL) {
dev_err_ratelimited(rdev->hwdev_hdl,
"[ROCE] %s: Failed to get cqm obj queue, func_id(%d), qpn:%u, event_type:%u\n",
__func__, rdev->glb_func_id, qpn, event_type);
return;
}
rqp = cqmobj_to_roce_qp(cqm_obj_queue);
roce3_qp_async_event(rdev, rqp, (int)event_type);
hiudk_cqm_object_put(rdev->hwdev, cqm_obj_queue);
}
/*
****************************************************************************
Prototype : roce3_handle_cq_async_event
Description : roce3_handle_cq_async_event
Input : struct roce3_device *rdev
u32 cqn
u8 event_type
Output : None
1.Date : 2017/5/4
Modification : Created function
****************************************************************************
*/
static void roce3_handle_cq_async_event(struct roce3_device *rdev, u32 cqn, u8 event_type)
{
struct roce3_cq *rcq = NULL;
struct tag_cqm_object *cqm_obj_queue = NULL;
cqm_obj_queue = cqm_object_get(rdev->hwdev, CQM_OBJECT_RDMA_SCQ, cqn, false);
if (cqm_obj_queue == NULL) {
dev_err_ratelimited(rdev->hwdev_hdl,
"[ROCE] %s: Failed to get cqm obj queue, func_id(%d), cqn:%u, event_type:%u\n",
__func__, rdev->glb_func_id, cqn, event_type);
return;
}
rcq = cqmobj_to_roce3_cq(cqm_obj_queue);
roce3_cq_async_event(rdev, rcq, (int)event_type);
hiudk_cqm_object_put(rdev->hwdev, cqm_obj_queue);
}
/*
****************************************************************************
Prototype : roce3_handle_srq_async_event
Description : roce3_handle_srq_async_event
Input : struct roce3_device *rdev
u32 srqn
u8 event_type
Output : None
1.Date : 2017/5/4
Modification : Created function
****************************************************************************
*/
static void roce3_handle_srq_async_event(struct roce3_device *rdev, u32 srqn, u8 event_type)
{
struct roce3_srq *rsrq = NULL;
struct tag_cqm_object *cqm_obj_queue = NULL;
cqm_obj_queue = cqm_object_get(rdev->hwdev, CQM_OBJECT_RDMA_SRQ, srqn, false);
if (cqm_obj_queue == NULL) {
dev_err_ratelimited(rdev->hwdev_hdl,
"[ROCE] %s: Failed to get cqm obj queue, func_id(%d), srqn:%u, event_type:%u\n",
__func__, rdev->glb_func_id, srqn, event_type);
return;
}
rsrq = cqmobj_to_roce3_srq(cqm_obj_queue);
roce3_srq_async_event(rdev, rsrq, (int)event_type);
hiudk_cqm_object_put(rdev->hwdev, cqm_obj_queue);
}
static bool roce3_async_event_need_reset(u8 event_status)
{
if ((event_status == API_FORMAT_ERROR) || (event_status == UNKNOWN_RESERVED_ERROR1))
return true;
return false;
}
u8 roce3_async_event_level(void *svc_hd, u8 event_type, u8 *val)
{
u8 event_level = FAULT_LEVEL_MAX;
u8 event_status;
u64 param = *((u64 *)val);
if (svc_hd == NULL) {
pr_err("[ROCE, ERR] %s: Svc_hd is null\n", __func__);
return FAULT_LEVEL_SUGGESTION;
}
event_status = param & 0xff;
switch (event_type) {
case ROCE_EVENT_TYPE_PATH_MIG:
case ROCE_EVENT_TYPE_COMM_EST:
case ROCE_EVENT_TYPE_SQ_DRAINED:
case ROCE_EVENT_TYPE_SRQ_QP_LAST_WQE:
case ROCE_EVENT_TYPE_SRQ_LIMIT:
case ROCE_EVENT_TYPE_PORT_MNG_CHG_EVENT:
case ROCE_EVENT_TYPE_PORT_CHANGE:
case ROCE_EVENT_TYPE_ECC_DETECT:
case ROCE_EVENT_TYPE_CMD:
case ROCE_EVENT_TYPE_COMM_CHANNEL:
case ROCE_EVENT_TYPE_OP_REQUIRED:
event_level = FAULT_LEVEL_SUGGESTION;
break;
case ROCE_EVENT_TYPE_LOCAL_CATAS_ERROR:
case ROCE_EVENT_TYPE_WQ_INVAL_REQ_ERROR: // ROCE_AEQE_EVENT_QP_REQ_ERR
if (event_status == API_FORMAT_ERROR)
return FAULT_LEVEL_SERIOUS_RESET;
break;
case ROCE_EVENT_TYPE_WQ_CATAS_ERROR: // ROCE_AEQE_EVENT_WQE_CATAS
if (roce3_async_event_need_reset(event_status))
return FAULT_LEVEL_SERIOUS_RESET;
break;
case ROCE_EVENT_TYPE_CQ_ERROR: // CQ ERR
if (roce3_async_event_need_reset(event_status))
return FAULT_LEVEL_SERIOUS_RESET;
break;
default:
event_level = FAULT_LEVEL_GENERAL;
break;
}
return event_level;
}
static int roce3_async_event_handle_common(u8 event_type, u8 *val, struct roce3_device *rdev)
{
u32 xid = 0;
int event_str_index = 0;
switch (event_type) {
case ROCE_EVENT_TYPE_PATH_MIG:
case ROCE_EVENT_TYPE_COMM_EST:
case ROCE_EVENT_TYPE_SQ_DRAINED:
case ROCE_EVENT_TYPE_SRQ_QP_LAST_WQE:
case ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case ROCE_EVENT_TYPE_PATH_MIG_FAILED:
case ROCE_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case ROCE_EVENT_TYPE_WQ_ACCESS_ERROR:
xid = *((u32 *)val);
roce3_handle_qp_async_event(rdev, xid, event_type);
event_str_index = to_qp_event_str_index(event_type);
break;
case ROCE_EVENT_TYPE_CQ_ERROR:
xid = *((u32 *)val);
roce3_handle_cq_async_event(rdev, xid, event_type);
event_str_index = OFED_ET_CQ_ERR;
break;
case ROCE_EVENT_TYPE_SRQ_LIMIT:
xid = *((u32 *)val);
roce3_handle_srq_async_event(rdev, xid, event_type);
event_str_index = OFED_ET_SRQ_LIMIT;
break;
case ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
xid = *((u32 *)val);
roce3_handle_srq_async_event(rdev, xid, event_type);
event_str_index = OFED_ET_SRQ_CATAS_ERR;
break;
default:
event_str_index = to_unknown_event_str_index(event_type, val);
break;
}
return event_str_index;
}
/*
****************************************************************************
Prototype : roce3_async_event
Description : roce3_async_event
Input : void *svc_hd
u8 event_type
u64 val
Output : None
1.Date : 2015/5/27
Modification : Created function
****************************************************************************
*/
void roce3_async_event(void *svc_hd, u8 event_type, u8 *val)
{
int event_str_index = 0;
struct roce3_device *rdev = NULL;
if (svc_hd == NULL) {
pr_err_ratelimited("[ROCE, ERR] %s: Svc_hd is null\n", __func__);
return;
}
rdev = (struct roce3_device *)svc_hd;
if (event_type <= ROCE_EVENT_TYPE_ODP_PAGE_FAULT)
event_str_index = roce3_async_event_handle_common(event_type, val, rdev);
else
event_str_index = roce3_async_event_handle_extend(event_type, val, rdev);
roce3_event_report(rdev, event_str_index);
}
static void roce3_handle_sq_hot_plug(struct roce3_qp *rqp, unsigned long flags_qp,
struct list_head *cq_notify_list)
{
struct roce3_cq *send_rcq = NULL;
unsigned long flags_cq = 0;
unsigned long flags_qp_tmp = flags_qp;
spin_lock_irqsave(&rqp->sq.lock, flags_qp_tmp);
if (rqp->sq.tail == rqp->sq.head)
goto roce_sq_hot_plug_end;
send_rcq = to_roce3_cq(rqp->ibqp.send_cq);
spin_lock_irqsave(&send_rcq->lock, flags_cq);
if (send_rcq->reset_flow_comp && rqp->ibqp.send_cq->comp_handler) {
if (send_rcq->reset_notify_added == 0) {
send_rcq->reset_notify_added = 1;
list_add_tail(&send_rcq->reset_notify, cq_notify_list);
}
}
spin_unlock_irqrestore(&send_rcq->lock, flags_cq);
roce_sq_hot_plug_end:
spin_unlock_irqrestore(&rqp->sq.lock, flags_qp_tmp);
}
static void roce3_handle_rq_hot_plug(struct roce3_qp *rqp, unsigned long flags_qp,
struct list_head *cq_notify_list)
{
struct roce3_cq *recv_rcq = NULL;
unsigned long flags_cq = 0;
unsigned long flags_qp_tmp = flags_qp;
spin_lock_irqsave(&rqp->rq.lock, flags_qp_tmp);
/* no handling is needed for SRQ */
if (rqp->ibqp.srq != NULL)
goto roce_rq_hot_plug_end;
else if (rqp->rq.tail == rqp->rq.head)
goto roce_rq_hot_plug_end;
recv_rcq = to_roce3_cq(rqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_rcq->lock, flags_cq);
if (recv_rcq->reset_flow_comp && rqp->ibqp.recv_cq->comp_handler) {
if (recv_rcq->reset_notify_added == 0) {
recv_rcq->reset_notify_added = 1;
list_add_tail(&recv_rcq->reset_notify, cq_notify_list);
}
}
spin_unlock_irqrestore(&recv_rcq->lock, flags_cq);
roce_rq_hot_plug_end:
spin_unlock_irqrestore(&rqp->rq.lock, flags_qp_tmp);
}
void roce3_handle_hotplug_arm_cq(struct roce3_device *rdev)
{
struct roce3_qp *rqp = NULL;
struct roce3_cq *rcq = NULL;
unsigned long flags_qp = 0;
unsigned long flags = 0;
struct list_head cq_notify_list;
pr_err("[ROCE, ERR] %s: Hotplug arm cq start:\n", __func__);
INIT_LIST_HEAD(&cq_notify_list);
/* Go over qp list reside on that rdev, sync with create/destroy qp */
spin_lock_irqsave(&rdev->reset_flow_resource_lock, flags);
list_for_each_entry(rqp, &rdev->qp_list, qps_list) {
roce3_handle_sq_hot_plug(rqp, flags_qp, &cq_notify_list);
roce3_handle_rq_hot_plug(rqp, flags_qp, &cq_notify_list);
}
list_for_each_entry(rcq, &cq_notify_list, reset_notify) {
rcq->reset_flow_comp(rcq);
}
spin_unlock_irqrestore(&rdev->reset_flow_resource_lock, flags);
pr_err("[ROCE, ERR] %s: Hotplug arm cq end!\n", __func__);
}
void roce3_kernel_hotplug_event_trigger(struct roce3_device *rdev)
{
int carrier_ok = 0;
if (rdev->ndev) {
carrier_ok = netif_carrier_ok(rdev->ndev);
if (carrier_ok != 0) {
dev_info(rdev->hwdev_hdl, "[ROCE] %s: Turn off, func_id(%u), name(%s), pci(%s),\n",
__func__, rdev->glb_func_id,
rdev->ib_dev.name, pci_name(rdev->pdev));
netif_carrier_off(rdev->ndev);
}
ROCE_MDELAY(ROCE_M_DELAY);
return;
}
dev_info(rdev->hwdev_hdl, "[ROCE] %s: rdev->ndev is NULL\n", __func__);
}
#ifdef ROCE_BONDING_EN
void roce3_handle_bonded_port_state_event(struct roce3_device *rdev)
{
struct net_device *master = netdev_master_upper_dev_get_rcu(rdev->ndev);
enum ib_port_state bonded_port_state = IB_PORT_NOP;
struct roce3_bond_device *bond_dev = NULL;
enum ib_port_state curr_port_state;
enum ib_event_type event;
struct roce3_bond_slave *slave = NULL;
int i;
if (master == NULL)
return;
bond_dev = rdev->bond_dev;
if (bond_dev == NULL) {
pr_err("[ROCE, ERR] %s: No bond_dev found\n", __func__);
return;
}
if (netif_running(master) != 0) {
mutex_lock(&bond_dev->slave_lock);
for (i = 0; i < bond_dev->slave_cnt; i++) {
slave = &bond_dev->slaves[i];
if (slave->netdev == NULL)
continue;
curr_port_state = (netif_running(slave->netdev) &&
netif_carrier_ok(slave->netdev)) ?
IB_PORT_ACTIVE :
IB_PORT_DOWN;
bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
curr_port_state : IB_PORT_ACTIVE;
}
mutex_unlock(&bond_dev->slave_lock);
} else {
bonded_port_state = IB_PORT_DOWN;
}
if (rdev->port_state != bonded_port_state) {
event = (bonded_port_state == IB_PORT_ACTIVE) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
if (roce3_ifconfig_up_down_event_report(rdev, event) != 0)
return;
rdev->port_state = bonded_port_state;
}
}
#endif