2026-01-29 22:25:33 +08:00

1195 lines
33 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2024 Huawei Technologies Co., Ltd
#include <rdma/ib_mad.h>
#include <rdma/ib_verbs.h>
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "hinic3_crm.h"
#include "hinic3_srv_nic.h"
#include "roce.h"
#include "roce_compat.h"
#include "roce_user.h"
#include "roce_pd.h"
#include "roce_qp.h"
#include "roce_cmd.h"
#include "roce_netdev.h"
#include "roce_main_extension.h"
#include "roce_pub_cmd.h"
#include "roce_mix.h"
#ifdef ROCE_BONDING_EN
#include "roce_bond.h"
#endif
struct net_device *roce3_ib_get_netdev(struct ib_device *ibdev, u32 port_num)
{
struct roce3_device *rdev = NULL;
struct net_device *netdev = NULL;
if (ibdev == NULL) {
pr_err("[ROCE] %s: Ibdev is null\n", __func__);
return NULL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
pr_err("[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return NULL;
}
#ifdef ROCE_BONDING_EN
netdev = roce3_bond_get_netdev(rdev);
if (netdev != NULL)
return netdev;
#endif
netdev = rdev->ndev;
dev_hold(netdev);
return netdev;
}
static void roce3_parse_fw_version(struct roce3_device *rdev, u64 *fw_ver)
{
int ret;
int i = 0;
struct hinic3_fw_version fw_version;
char *fw_str = (char *)fw_version.microcode_ver;
char *fw_temp = NULL;
u64 fw_verion[ROCE_FW_VERSION_LEN] = {0};
ret = hinic3_get_fw_version(rdev->hwdev, &fw_version, HINIC3_CHANNEL_ROCE);
if (ret != 0) {
pr_warn("[ROCE] %s: get fw version failed\n", __func__);
*fw_ver = ROCE_FW_VER;
return;
}
pr_info("[ROCE] %s: fw ver:%s - %s - %s\n", __func__, fw_version.boot_ver,
fw_version.mgmt_ver, fw_version.microcode_ver);
while (((fw_temp = strsep(&fw_str, ".")) != NULL) && (i < ROCE_FW_VERSION_LEN)) {
ret = kstrtou64(fw_temp, 10, &fw_verion[i]);
if (ret != 0) {
pr_warn("[ROCE] %s: parse fw version failed\n", __func__);
*fw_ver = ROCE_FW_VER;
return;
}
i++;
}
/*
* 0 is fw_version array idx, 32 is offset
* 1 is fw_version array idx, 16 is offset
* 2 is fw_version array idx, 8 is offset
* 3 is fw_version array idx
*/
*fw_ver = (((fw_verion[0] & 0xffffffff) << 32) |
((fw_verion[1] & 0xffff) << 16) |
((fw_verion[2] & 0xff) << 8) |
(fw_verion[3] & 0xff));
}
static void roce3_set_local_cap_flag(const struct rdma_service_cap *rdma_cap,
struct ib_device_attr *props)
{
if (((rdma_cap->flags & RDMA_BMME_FLAG_LOCAL_INV) != 0) &&
((rdma_cap->flags & RDMA_BMME_FLAG_REMOTE_INV) != 0) &&
((rdma_cap->flags & RDMA_BMME_FLAG_FAST_REG_WR) != 0)) {
props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_MGT_EXTENSIONS;
}
}
static void roce3_set_bmme_cap_flag(const struct rdma_service_cap *rdma_cap,
struct ib_device_attr *props)
{
if ((rdma_cap->flags & RDMA_BMME_FLAG_TYPE_2_WIN) != 0) {
if ((rdma_cap->flags & RDMA_BMME_FLAG_WIN_TYPE_2B) != 0)
props->device_cap_flags = props->device_cap_flags |
IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags = props->device_cap_flags |
IB_DEVICE_MEM_WINDOW_TYPE_2A;
}
}
static void roce3_query_device_props_set(struct roce3_device *rdev,
struct rdma_service_cap *rdma_cap, struct ib_device_attr *props)
{
props->vendor_id = rdev->pdev->vendor;
props->vendor_part_id = rdev->pdev->device;
roce3_parse_fw_version(rdev, &props->fw_ver);
props->hw_ver = ROCE_HW_VER;
/* sys_image_guid equal GID */
props->sys_image_guid = rdev->ib_dev.node_guid;
props->max_mr_size = ~0ULL;
props->page_size_cap = rdma_cap->page_size_cap;
props->max_qp = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_qps - rdma_cap->reserved_qps);
props->max_qp_wr = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_wqes;
/*
* 4.19 ofed will return the smaller of sq/rq sge num to user space.
* 4.17 We use max_sge to only represent max sq sge num, max_rq_sge is a fixed macro of 16.
*/
props->max_send_sge = rdma_cap->max_sq_sg;
props->max_recv_sge = rdma_cap->dev_rdma_cap.roce_own_cap.max_rq_sg;
props->max_cq = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_cqs - rdma_cap->reserved_cqs);
props->max_cqe = (int)rdma_cap->max_cqes;
if ((rdev->board_info.port_num == ROCE_PORT_NUM_2) &&
(rdev->board_info.port_speed == ROCE_25G_PORT_SPEED)) {
// 2 smf for 64B cache
props->max_mr = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_mpts -
rdma_cap->reserved_mrws) / MEND_CAP_DEVIDE;
props->max_srq =
(int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs -
rdma_cap->dev_rdma_cap.roce_own_cap.reserved_srqs) /
MEND_CAP_DEVIDE;
} else {
props->max_mr = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_mpts -
rdma_cap->reserved_mrws);
props->max_srq =
(int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs -
rdma_cap->dev_rdma_cap.roce_own_cap.reserved_srqs);
}
props->max_mw = props->max_mr;
props->max_pd = (int)(rdma_cap->num_pds - rdma_cap->reserved_pds);
props->max_qp_rd_atom = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_qp_dest_rdma;
props->max_qp_init_rd_atom = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_wr = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_srq_wqes;
props->max_srq_sge = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_srq_sge;
props->max_fast_reg_page_list_len = rdma_cap->max_frpl_len;
props->local_ca_ack_delay = (u8)rdma_cap->local_ca_ack_delay;
props->atomic_cap = ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_ATOMIC) != 0) ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = (u16)rdma_cap->max_pkeys;
props->max_ah = INT_MAX;
}
/*
****************************************************************************
Prototype : roce3_query_device
Description : query device attribute
Input : struct ib_device *ibdev
struct ib_device_attr *props
struct ib_udata *uhw
Output : struct ib_device_attr *props
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw)
{
struct roce3_device *rdev = NULL;
struct rdma_service_cap *rdma_cap = NULL;
if ((ibdev == NULL) || (props == NULL)) {
pr_err("[ROCE] %s: Ibdev or props is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
memset(props, 0, sizeof(*props));
rdma_cap = &rdev->rdma_cap;
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN;
/* APM */
if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_APM) != 0)
props->device_cap_flags = props->device_cap_flags | IB_DEVICE_AUTO_PATH_MIG;
/* rsvd_lKey */
if ((rdma_cap->flags & RDMA_BMME_FLAG_RESERVED_LKEY) != 0)
props->device_cap_flags = props->device_cap_flags | IB_DEVICE_LOCAL_DMA_LKEY;
roce3_set_local_cap_flag(rdma_cap, props);
#ifndef ROCE_COMPUTE
/* support XRC */
if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_XRC) != 0)
props->device_cap_flags = props->device_cap_flags | IB_DEVICE_XRC;
#endif
/* support MW */
if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_MEM_WINDOW) != 0)
props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_WINDOW;
roce3_set_bmme_cap_flag(rdma_cap, props);
roce3_query_device_props_set(rdev, rdma_cap, props);
return 0;
}
static void eth_link_get_speed(struct ib_port_attr *props, enum mag_cmd_port_speed speed)
{
switch (speed) {
/* 10G <==> 1X x 10G */
case PORT_SPEED_10GB:
props->active_width = IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
break;
/* 25G <==> 1X x 25G */
case PORT_SPEED_25GB:
props->active_width = IB_WIDTH_1X;
props->active_speed = IB_SPEED_EDR;
break;
/* 40G <==> 4X x 10G */
case PORT_SPEED_40GB:
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_QDR;
break;
/* 100G <==> 4X x 25G */
case PORT_SPEED_100GB:
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
break;
default:
props->active_width = 0;
props->active_speed = 0;
break;
}
}
static void roce3_set_ib_port_attr(struct ib_port_attr *props, struct roce3_device *rdev)
{
props->port_cap_flags = IB_PORT_CM_SUP;
props->gid_tbl_len = (int)rdev->rdma_cap.max_gid_per_port;
props->max_msg_sz = rdev->rdma_cap.dev_rdma_cap.roce_own_cap.max_msg_sz;
props->pkey_tbl_len = (u16)rdev->rdma_cap.max_pkeys;
props->max_mtu = IB_MTU_4096;
props->state = IB_PORT_DOWN;
props->phys_state = ROCE_PORT_PHYS_STATE_DISABLED;
props->active_mtu = IB_MTU_256;
}
#ifdef OFED_MLNX_5_8
static void eth_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
#else
static void eth_link_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props)
#endif
{
struct roce3_device *rdev = NULL;
struct net_device *netdev = NULL;
#ifdef ROCE_BONDING_EN
struct net_device *upper = NULL;
#endif
enum ib_mtu mtu;
enum mag_cmd_port_speed speed = PORT_SPEED_10GB;
int ret = 0;
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u), dev_name(%s).\n",
__func__, rdev->glb_func_id, ibdev->name);
return;
}
roce3_set_ib_port_attr(props, rdev);
ret = hinic3_get_speed(rdev->hwdev, &speed, HINIC3_CHANNEL_ROCE);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to get speed, func_id(%d)\n",
__func__, rdev->glb_func_id);
props->active_width = 0;
props->active_speed = 0;
return;
}
eth_link_get_speed(props, speed);
netdev = roce3_ib_get_netdev(ibdev, ROCE_DEFAULT_PORT_NUM);
if (netdev == NULL)
return;
#ifdef ROCE_BONDING_EN
if (roce3_bond_is_active(rdev)) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(netdev);
if (upper != NULL) {
dev_put(netdev);
netdev = upper;
dev_hold(netdev);
}
rcu_read_unlock();
}
#endif
if (netif_running(netdev) && netif_carrier_ok(netdev)) {
props->state = IB_PORT_ACTIVE;
props->phys_state = ROCE_PORT_PHYS_STATE_LINKUP;
}
mtu = (enum ib_mtu)iboe_get_mtu((int)netdev->mtu);
dev_put(netdev);
props->active_mtu = ROCE_MIN(props->max_mtu, mtu);
}
/*
****************************************************************************
Prototype : roce3_query_port
Description : query port attribute
Input : struct ib_device *ibdev
u8 port
struct ib_port_attr *props
Output : struct ib_port_attr *props
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props)
{
if ((ibdev == NULL) || (props == NULL)) {
pr_err("[ROCE] %s: Ibdev or props is null\n", __func__);
return -EINVAL;
}
memset(props, 0, sizeof(*props));
eth_link_query_port(ibdev, port, props);
return 0;
}
/*
****************************************************************************
Prototype : roce3_query_gid
Description : query gid
Input : struct ib_device *ibdev
u8 port
int index
union ib_gid *gid
Output : union ib_gid *gid
1.Date : 2015/5/8
Modification : Created function
2.Date : 2015/6/8
Modification : Modify function
****************************************************************************
*/
int roce3_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid)
{
int ret = 0;
struct roce3_device *rdev = NULL;
struct rdma_gid_entry gid_entry;
if ((ibdev == NULL) || (gid == NULL)) {
pr_err("[ROCE] %s: Ibdev or gid is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
ret = roce3_rdma_get_gid(rdev->hwdev, (u32)port, (u32)index, &gid_entry);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to get gid, func_id(%d)\n",
__func__, rdev->glb_func_id);
return ret;
}
memcpy((void *)gid->raw, (void *)gid_entry.raw, sizeof(*gid));
// 按照OFED的gid生成方式转换GID, 仅IPv4场景需要转换
if (gid_entry.dw6_h.bs.gid_type == ROCE_IPv4_ROCEv2_GID) {
// 未add的gid直接返回不需要转换
if ((gid->global.subnet_prefix == 0) && (gid->global.interface_id == 0))
return 0;
ipv6_addr_set_v4mapped(*((u32 *)(void *)gid + ROCE_GID_IP_IDX),
(struct in6_addr *)gid);
}
return 0;
}
/*
****************************************************************************
Prototype : roce3_query_pkey
Description : query pkey
Input : struct ib_device *ibdev
u8 port
u16 index
u16 *pkey
Output : u16 *pkey
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
{
struct roce3_device *rdev = NULL;
if ((ibdev == NULL) || (pkey == NULL)) {
pr_err("[ROCE] %s: Ibdev or pkey is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
if (pkey == NULL) {
pr_err("[ROCE] %s: Pkey is null\n", __func__);
return -EINVAL;
}
*pkey = 0xffff;
return 0;
}
/*
****************************************************************************
Prototype : roce3_modify_device
Description : modify device attribute
Input : struct ib_device *ibdev
int mask
struct ib_device_modify *props
Output : None
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props)
{
unsigned long flags = 0;
struct roce3_device *rdev = NULL;
if ((ibdev == NULL) || (props == NULL)) {
pr_err("[ROCE] %s: Ibdev or props is null\n", __func__);
return -EINVAL;
}
if (((unsigned int)mask & ~IB_DEVICE_MODIFY_NODE_DESC) != 0) {
pr_err("[ROCE] %s: Not supported to modify node description\n", __func__);
return -EOPNOTSUPP;
}
if ((((u32)mask) & IB_DEVICE_MODIFY_NODE_DESC) == 0) {
pr_info("[ROCE] %s: No need to modify node description\n", __func__);
return 0;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
spin_lock_irqsave(&rdev->node_desc_lock, flags);
memcpy((void *)ibdev->node_desc, (void *)props->node_desc, IB_DEVICE_NODE_DESC_MAX);
spin_unlock_irqrestore(&rdev->node_desc_lock, flags);
return 0;
}
/*
****************************************************************************
Prototype : roce3_modify_port
Description : modify port attribute
Input : struct ib_device *ibdev
u8 port
int mask
struct ib_port_modify *props
Output : None
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_modify_port(struct ib_device *ibdev, u32 port, int mask, struct ib_port_modify *props)
{
int ret = 0;
struct ib_port_attr attr;
struct roce3_device *rdev = NULL;
if (ibdev == NULL) {
pr_err("[ROCE] %s: Ibdev is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
memset(&attr, 0, sizeof(struct ib_port_attr));
mutex_lock(&rdev->cap_mask_mutex);
ret = roce3_query_port(ibdev, port, &attr);
if (ret != 0)
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to query port, func_id(%d)\n",
__func__, rdev->glb_func_id);
mutex_unlock(&rdev->cap_mask_mutex);
return ret;
}
static void roce3_alloc_ucontext_set(struct roce3_device *rdev,
struct roce3_alloc_ucontext_resp *resp)
{
struct rdma_service_cap *rdma_cap = NULL;
rdma_cap = &rdev->rdma_cap;
resp->num_qps = rdma_cap->dev_rdma_cap.roce_own_cap.max_qps;
resp->num_xsrqs = rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs;
resp->cqe_size = rdma_cap->cqe_size;
resp->wqebb_size = rdma_cap->wqebb_size;
resp->dwqe_size = rdma_cap->direct_wqe_size;
resp->max_msg_size = rdma_cap->dev_rdma_cap.roce_own_cap.max_msg_sz;
resp->max_comp_vector = rdma_cap->num_comp_vectors;
resp->max_inline_size = rdma_cap->dev_rdma_cap.roce_own_cap.max_sq_inline_data_sz;
resp->storage_aa_en = roce3_is_roceaa(rdev->cfg_info.scence_id);
resp->phy_port = rdev->hw_info.phy_port;
resp->srq_container_en = rdev->cfg_info.srq_container_en;
resp->srq_container_mode = rdev->cfg_info.srq_container_mode;
resp->xrc_srq_container_mode = rdev->cfg_info.xrc_srq_container_mode;
resp->warn_th = rdev->cfg_info.warn_th;
roce3_resp_set_ext(rdev, resp);
}
static int roce3_alloc_ucontext_pre_check(struct ib_device *ibdev, const struct ib_udata *udata)
{
struct roce3_device *rdev = NULL;
if ((ibdev == NULL) || (udata == NULL)) {
pr_err("[ROCE] %s: Ibdev or udata is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
if (!rdev->ib_active) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Device is abnormal, func_id(%d)\n",
__func__, rdev->glb_func_id);
return -EAGAIN;
}
return 0;
}
static int roce3_alloc_ucontext_return(struct roce3_device *rdev, struct ib_udata *udata,
struct roce3_ucontext *context, struct roce3_alloc_ucontext_resp *resp)
{
int ret;
resp->db_offset = context->db_dma_addr & ((1 << PAGE_SHIFT) - 1);
resp->dwqe_offset = context->dwqe_dma_addr & ((1 << PAGE_SHIFT) - 1);
if (context->dwqe_dma_addr == 0)
resp->dwqe_size = 0;
roce3_ucontext_set_ext(rdev, context);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
/* Copy data to user space */
ret = ib_copy_to_udata_ext(udata, resp);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to copy data to user space, func_id(%d)\n",
__func__, rdev->glb_func_id);
return ret;
}
return 0;
}
int roce3_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
{
int ret;
struct roce3_ucontext *context = rdma_udata_to_drv_context(
udata, struct roce3_ucontext, ibucontext);
struct roce3_device *rdev = to_roce3_dev(ibucontext->device);
struct roce3_alloc_ucontext_resp *resp = NULL;
ret = roce3_alloc_ucontext_pre_check(ibucontext->device, udata);
if (ret != 0)
return ret;
resp = roce3_resp_alloc_ext();
if (resp == NULL) {
ret = (-ENOMEM);
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to alloc ucontext, func_id(%u)\n",
__func__, rdev->glb_func_id);
goto err;
}
roce3_alloc_ucontext_set(rdev, resp);
/* Alloc user space context Doorbell and DWQE */
ret = hinic3_alloc_db_phy_addr(rdev->hwdev, &context->db_dma_addr, &context->dwqe_dma_addr);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to alloc DB pa, ret(%d), func_id(%u)\n",
__func__, ret, rdev->glb_func_id);
goto err_db;
}
/* Copy data to user space */
ret = roce3_alloc_ucontext_return(rdev, udata, context, resp);
if (ret != 0)
goto err_return;
kfree(resp);
return 0;
err_return:
hinic3_free_db_phy_addr(rdev->hwdev, context->db_dma_addr, context->dwqe_dma_addr);
err_db:
kfree(resp);
err:
return ret;
}
void roce3_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct roce3_ucontext *context = NULL;
struct roce3_device *rdev = NULL;
if (ibcontext == NULL) {
pr_err("[ROCE] %s: Ibcontext is null\n", __func__);
return;
}
context = to_roce3_ucontext(ibcontext);
rdev = to_roce3_dev(ibcontext->device);
hinic3_free_db_phy_addr(rdev->hwdev, context->db_dma_addr, context->dwqe_dma_addr);
}
/*
****************************************************************************
Prototype : roce3_mmap
Description : memory map
Input : struct ib_ucontext *ibcontext
struct vm_area_struct *vma
Output : None
1.Date : 2015/5/8
Modification : Created function
****************************************************************************
*/
int roce3_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct roce3_device *rdev = NULL;
struct roce3_ucontext *ucontext = NULL;
unsigned long db_pfn = 0;
unsigned long dwqe_pfn = 0;
int res = 0;
if ((ibcontext == NULL) || (vma == NULL)) {
pr_err("[ROCE] %s: Ibcontext or vma is null\n", __func__);
return -EINVAL;
}
rdev = to_roce3_dev(ibcontext->device);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
ucontext = to_roce3_ucontext(ibcontext);
db_pfn = ucontext->db_dma_addr >> PAGE_SHIFT;
dwqe_pfn = ucontext->dwqe_dma_addr >> PAGE_SHIFT;
if ((vma->vm_end - vma->vm_start) != PAGE_SIZE) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: (Vm_end - vm_start) is not equal to PAGE_SIZE, func_id(%d)\n",
__func__, rdev->glb_func_id);
return -EINVAL;
}
/* map hw DB to physical page from user */
if (vma->vm_pgoff == USR_MMAP_DB_OFFSET) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
/* construct vm_start~vm_start+PAGE_SIZE page table
* db_pfn is page number
* vm_page_prot means attr
*/
if (io_remap_pfn_range(vma, vma->vm_start, db_pfn, PAGE_SIZE,
vma->vm_page_prot) != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to do db io remap, func_id(%d)\n",
__func__, rdev->glb_func_id);
return -EAGAIN;
}
return 0;
}
// DWQE mmap
if ((vma->vm_pgoff == USR_MMAP_DWQE_OFFSET) && (rdev->rdma_cap.direct_wqe_size != 0)) {
#ifdef __aarch64__
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
#else
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
#endif
if (io_remap_pfn_range(vma, vma->vm_start, dwqe_pfn, PAGE_SIZE,
vma->vm_page_prot) != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] %s: Failed to do dwqe io remap, func_id(%d)\n",
__func__, rdev->glb_func_id);
return -EAGAIN;
}
return 0;
}
res = roce3_mmap_ext(rdev, ucontext, vma);
return res;
}
enum rdma_link_layer roce3_port_link_layer(struct ib_device *ibdev, u32 port_num)
{
struct roce3_device *rdev = NULL;
if (ibdev == NULL)
return IB_LINK_LAYER_UNSPECIFIED;
rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return IB_LINK_LAYER_UNSPECIFIED;
}
if (port_num != ROCE_DEFAULT_PORT_NUM)
return IB_LINK_LAYER_UNSPECIFIED;
return IB_LINK_LAYER_ETHERNET;
}
static void roce3_resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
((struct roce3_resolve_cb_context *)context)->status = status;
complete(&((struct roce3_resolve_cb_context *)context)->comp);
}
static int roce3_rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev, int *hoplimit, struct roce3_device *rdev)
{
struct rdma_dev_addr dev_addr;
struct roce3_resolve_cb_context ctx;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
int ret;
rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
if (ndev) {
dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = dev_net(ndev);
} else {
dev_addr.net = &init_net;
}
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, &dev_addr,
RESOLVE_IP_TIME_OUT, roce3_resolve_cb, false, &ctx);
if (ret != 0) {
pr_err("[ROCE] %s: rdma_resolve_ip failed. Igonore the err.\n", __func__);
roce3_resolve_cb(0, &sgid_addr._sockaddr, &dev_addr, &ctx);
}
wait_for_completion(&ctx.comp);
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
if (hoplimit)
*hoplimit = dev_addr.hoplimit;
return 0;
}
static int roce3_ah_valid_check(struct ib_global_route *grh, u16 *vlan_id, u8 *dmac)
{
u8 unicast_gid0[ROCE_GID_LEN] = { 0 };
u8 unicast_gid1[ROCE_GID_LEN] = { 0 };
/* check gid(unicast gid can not be 0 or 1) */
unicast_gid0[ROCE_GID_HIGHEST_BYTE] = 0;
unicast_gid1[ROCE_GID_HIGHEST_BYTE] = 1;
if ((ROCE_MEMCMP(grh->dgid.raw, unicast_gid0, sizeof(union ib_gid)) == 0) ||
(ROCE_MEMCMP(grh->dgid.raw, unicast_gid1, sizeof(union ib_gid)) == 0)) {
pr_err("[ROCE] %s: Invalid unicast dgid\n", __func__);
return (-EINVAL);
}
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) != 0) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, dmac);
*vlan_id = ROCE_DEFAULT_VLAN_ID;
}
return 0;
}
static int roce3_fill_gid_attr(struct roce3_device *rdev, struct rdma_ah_attr *ah_attr,
union ib_gid *sgid, const struct ib_gid_attr **sgid_attr)
{
int ret = 0;
ret = rdma_query_gid(&rdev->ib_dev, ah_attr->port_num, ah_attr->grh.sgid_index, sgid);
if (ret != 0) {
dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to query gid func_id(%u),port_num(%d),gid_index(%d),ret(%d)\n",
rdev->glb_func_id, ah_attr->port_num, ah_attr->grh.sgid_index, ret);
return ret;
}
*sgid_attr = rdma_get_gid_attr(&rdev->ib_dev, ah_attr->port_num, ah_attr->grh.sgid_index);
if (IS_ERR_OR_NULL(*sgid_attr)) {
ret = (int)PTR_ERR(*sgid_attr);
dev_err(rdev->hwdev_hdl,
"[ROCE] : Failed to get sgid_attr, func_id(%u), ret(%d).\n",
rdev->glb_func_id, ret);
return ret;
}
return ret;
}
static void roce3_release_gid_ref_cnt(const struct ib_gid_attr *sgid_attr)
{
rdma_put_gid_attr(sgid_attr);
}
static struct net_device *roce3_fill_netdev(struct roce3_device *rdev, union ib_gid *sgid)
{
struct net_device *netdev = NULL;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} socket_addr;
rdma_gid2ip((struct sockaddr *)&socket_addr, sgid);
/* find netdev,rdev->ndevis not valid in vlan scenario */
netdev = ip_dev_find(&init_net,
((const struct sockaddr_in *)&socket_addr._sockaddr)->sin_addr.s_addr);
if (netdev)
dev_put(netdev);
return netdev;
}
int roce3_resolve_grh(struct roce3_device *rdev, struct rdma_ah_attr *ah_attr,
u16 *vlan_id, struct ib_udata *udata)
{
int ret = 0;
u8 zero_mac[ETH_ALEN] = { 0 };
u8 *dmac = NULL;
union ib_gid sgid;
const struct ib_gid_attr *sgid_attr = NULL;
struct net_device *netdev = NULL;
if ((rdev == NULL) || (ah_attr == NULL) || (vlan_id == NULL)) {
pr_err("[ROCE, ERR] %s: Input pointer is NULL, rdev(%p), ah_attr(%p), vlan_id(%p).\n",
__func__, rdev, ah_attr, vlan_id);
return (-EINVAL);
}
dmac = ah_attr->roce.dmac;
ret = roce3_ah_valid_check(&ah_attr->grh, vlan_id, dmac);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] : Failed to check grh input, func_id(%u), ret(%d).\n",
rdev->glb_func_id, ret);
return ret;
}
if (ROCE_MEMCMP(dmac, zero_mac, ETH_ALEN) != 0)
return 0;
ret = roce3_fill_gid_attr(rdev, ah_attr, &sgid, &sgid_attr);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] : Failed to fill gid attr, func_id(%u), ret(%d)\n",
rdev->glb_func_id, ret);
return ret;
}
netdev = roce3_fill_netdev(rdev, &sgid);
/* reparse dmac avoiding invalid damc from OFED */
ret = roce3_rdma_addr_find_l2_eth_by_grh(&sgid, &ah_attr->grh.dgid, dmac,
netdev, NULL, rdev);
if (ret != 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] : Failed to find dmac by grh, func_id(%u)\n",
rdev->glb_func_id);
goto resolve_grh_end;
}
if (ROCE_MEMCMP(dmac, zero_mac, ETH_ALEN) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] : Failed to find valid dmac, func_id(%u)\n",
rdev->glb_func_id);
ret = (-EINVAL);
goto resolve_grh_end;
}
*vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
resolve_grh_end:
roce3_release_gid_ref_cnt(sgid_attr);
return ret;
}
static int ah_get_vlan_id(struct roce3_device *rdev, struct ib_pd *pd,
struct rdma_ah_attr *ah_attr, u32 *vlan_id)
{
struct net_device *ndev;
rcu_read_lock();
ndev = rcu_dereference(ah_attr->grh.sgid_attr->ndev);
if (ndev == NULL) {
rcu_read_unlock();
dev_err(rdev->hwdev_hdl,
"[ROCE, ERR] : Net device is NULL, func_id(%u)\n", rdev->glb_func_id);
return -EINVAL;
}
*vlan_id = rdma_vlan_dev_vlan_id(ndev);
rcu_read_unlock();
return 0;
}
static int create_ib_ah(struct roce3_device *rdev, struct ib_pd *pd, struct roce3_ah *rah,
struct rdma_ah_attr *ah_attr)
{
int ret;
u8 *dmac = ah_attr->roce.dmac;
u32 vlan_id = 0;
ret = ah_get_vlan_id(rdev, pd, ah_attr, &vlan_id);
if (ret != 0) {
pr_err("[ROCE, ERR] %s: Failed to get vlan_id (ret:%d)\n", __func__, ret);
return -EFAULT;
}
if (((u32)rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) != 0) {
memcpy((void *)rah->priv_ah.dgid, (void *)ah_attr->grh.dgid.raw,
sizeof(rah->priv_ah.dgid));
rah->priv_ah.dw2.bs.flow_label = ah_attr->grh.flow_label & 0xfffff;
rah->priv_ah.dw1.bs.sgid_index = ah_attr->grh.sgid_index & 0x7f;
rah->priv_ah.dw1.bs.hoplimit = ah_attr->grh.hop_limit;
rah->priv_ah.dw1.bs.tclass = (u8)(ah_attr->grh.traffic_class | 0x2);
}
rah->priv_ah.dw0.bs.pd = to_roce3_pd(pd)->pdn & 0x3ffff;
rah->priv_ah.dw0.bs.wqe_cos = roce3_get_db_cos_from_vlan_pri(rdev, ah_attr->sl);
rah->priv_ah.dw0.value = cpu_to_be32(rah->priv_ah.dw0.value);
rah->priv_ah.dw1.bs.port = ah_attr->port_num & 0xf;
rah->priv_ah.dw2.bs.smac_index = rdev->glb_func_id; /* set global Function ID */
rah->priv_ah.dw2.value = cpu_to_be32(rah->priv_ah.dw2.value);
rah->priv_ah.dw1.bs.resv = 0;
rah->priv_ah.dw7.bs.vlan_id = vlan_id & 0xfff;
rah->priv_ah.dw7.bs.vlan_pri = ah_attr->sl & 0x7;
rah->priv_ah.dw1.value = cpu_to_be32(rah->priv_ah.dw1.value);
rah->priv_ah.dw7.bs.dmac_h16 = (dmac[0] << ROCE_RAH_DMAC_H16_SHIFT) | dmac[1];
rah->priv_ah.dw7.value = cpu_to_be32(rah->priv_ah.dw7.value);
memcpy((void *)&rah->priv_ah.dmac_l32,
(void *)&dmac[ROCE_RAH_DMAC_L32_START], sizeof(rah->priv_ah.dmac_l32));
return 0;
}
int roce3_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata)
{
struct roce3_ah *rah = to_roce3_ah(ibah);
struct roce3_device *rdev = to_roce3_dev(ibah->device);
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum rdma_ah_attr_type ah_type = ah_attr->type;
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) && (((u32)rdma_ah_get_ah_flags(ah_attr)
& IB_AH_GRH) == 0))
return -EINVAL;
return create_ib_ah(rdev, ibah->pd, rah, ah_attr);
}
int roce3_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct roce3_ah *ah = NULL;
struct roce3_priv_ah priv_ah;
if ((ibah == NULL) || (ah_attr == NULL)) {
pr_err("[ROCE] %s: Ibah or ah_attr is null\n", __func__);
return -EINVAL;
}
ah = to_roce3_ah(ibah);
memset(ah_attr, 0, sizeof(*ah_attr));
priv_ah.dw1.value = be32_to_cpu(ah->priv_ah.dw1.value);
priv_ah.dw2.value = be32_to_cpu(ah->priv_ah.dw2.value);
priv_ah.dw7.value = be32_to_cpu(ah->priv_ah.dw7.value);
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->sl = priv_ah.dw7.bs.vlan_pri;
ah_attr->port_num = priv_ah.dw1.bs.port;
ah_attr->grh.traffic_class = priv_ah.dw1.bs.tclass;
ah_attr->grh.hop_limit = priv_ah.dw1.bs.hoplimit;
ah_attr->grh.sgid_index = priv_ah.dw1.bs.sgid_index;
ah_attr->grh.flow_label = priv_ah.dw2.bs.flow_label;
memcpy((void *)ah_attr->grh.dgid.raw, (void *)ah->priv_ah.dgid,
sizeof(ah->priv_ah.dgid));
return 0;
}
int roce3_destroy_ah(struct ib_ah *ibah, u32 flags)
{
return 0;
}
int roce3_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
struct roce3_device *rdev = to_roce3_dev(ibdev);
if (roce3_hca_is_present(rdev) == 0) {
dev_err(rdev->hwdev_hdl,
"[ROCE] %s: HCA not present(return fail), func_id(%u)\n",
__func__, rdev->glb_func_id);
return -EPERM;
}
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; // only rocev2
err = ib_query_port(ibdev, port_num, &attr);
if (err != 0) {
pr_err("[ROCE] %s: query ib port failed\n", __func__);
return err;
}
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
int roce3_get_dcb_cfg_cos(struct roce3_device *rdev, struct roce3_get_cos_inbuf *inbuf, u8 *cos)
{
int ret;
u8 pri;
struct rdma_gid_entry gid;
struct hinic3_dcb_state dcb = { 0 };
ret = roce3_rdma_get_gid(rdev->hwdev, inbuf->port_num, inbuf->sgid_index, &gid);
if (ret != 0) {
pr_err("[ROCE, ERR] %s: Failed to init gid info\n", __func__);
return (-EINVAL);
}
ret = hinic3_get_dcb_state(rdev->hwdev, &dcb);
if (ret != 0) {
pr_err("[ROCE] %s: hinic3_get_dcb_state failed.ret: %d.\n", __func__, ret);
return (-EINVAL);
}
*cos = dcb.default_cos;
gid.dw6_h.value = cpu_to_le16(gid.dw6_h.value);
if ((dcb.trust == ROCE3_DCB_PCP) && (gid.dw6_h.bs.tag == ROCE_GID_VLAN_INVALID)) {
// pcp cfg & no vlan should use default cos
return 0;
}
pri = (dcb.trust == ROCE3_DCB_PCP) ? inbuf->sl : (inbuf->traffic_class >> ROCE3_DSCP_IDX);
ret = hinic3_get_cos_by_pri(rdev->hwdev, pri, cos);
if (ret != 0) {
pr_err("[ROCE] %s: get_cos_by_pri failed.ret: %d, pri:%u, dcb_on:%u, trust:%u.\n",
__func__, ret, pri, dcb.dcb_on, dcb.trust);
return (-EINVAL);
}
return 0;
}