// SPDX-License-Identifier: GPL-2.0 // Copyright(c) 2024 Huawei Technologies Co., Ltd #include #include #include #include #include #include "hinic3_crm.h" #include "hinic3_srv_nic.h" #include "roce.h" #include "roce_compat.h" #include "roce_user.h" #include "roce_pd.h" #include "roce_qp.h" #include "roce_cmd.h" #include "roce_netdev.h" #include "roce_main_extension.h" #include "roce_pub_cmd.h" #include "roce_mix.h" #ifdef ROCE_BONDING_EN #include "roce_bond.h" #endif struct net_device *roce3_ib_get_netdev(struct ib_device *ibdev, u32 port_num) { struct roce3_device *rdev = NULL; struct net_device *netdev = NULL; if (ibdev == NULL) { pr_err("[ROCE] %s: Ibdev is null\n", __func__); return NULL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { pr_err("[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return NULL; } #ifdef ROCE_BONDING_EN netdev = roce3_bond_get_netdev(rdev); if (netdev != NULL) return netdev; #endif netdev = rdev->ndev; dev_hold(netdev); return netdev; } static void roce3_parse_fw_version(struct roce3_device *rdev, u64 *fw_ver) { int ret; int i = 0; struct hinic3_fw_version fw_version; char *fw_str = (char *)fw_version.microcode_ver; char *fw_temp = NULL; u64 fw_verion[ROCE_FW_VERSION_LEN] = {0}; ret = hinic3_get_fw_version(rdev->hwdev, &fw_version, HINIC3_CHANNEL_ROCE); if (ret != 0) { pr_warn("[ROCE] %s: get fw version failed\n", __func__); *fw_ver = ROCE_FW_VER; return; } pr_info("[ROCE] %s: fw ver:%s - %s - %s\n", __func__, fw_version.boot_ver, fw_version.mgmt_ver, fw_version.microcode_ver); while (((fw_temp = strsep(&fw_str, ".")) != NULL) && (i < ROCE_FW_VERSION_LEN)) { ret = kstrtou64(fw_temp, 10, &fw_verion[i]); if (ret != 0) { pr_warn("[ROCE] %s: parse fw version failed\n", __func__); *fw_ver = ROCE_FW_VER; return; } i++; } /* * 0 is fw_version array idx, 32 is offset * 1 is fw_version array idx, 16 is offset * 2 is fw_version array idx, 8 is offset * 3 is fw_version array idx */ *fw_ver = (((fw_verion[0] & 0xffffffff) << 32) | ((fw_verion[1] & 0xffff) << 16) | ((fw_verion[2] & 0xff) << 8) | (fw_verion[3] & 0xff)); } static void roce3_set_local_cap_flag(const struct rdma_service_cap *rdma_cap, struct ib_device_attr *props) { if (((rdma_cap->flags & RDMA_BMME_FLAG_LOCAL_INV) != 0) && ((rdma_cap->flags & RDMA_BMME_FLAG_REMOTE_INV) != 0) && ((rdma_cap->flags & RDMA_BMME_FLAG_FAST_REG_WR) != 0)) { props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_MGT_EXTENSIONS; } } static void roce3_set_bmme_cap_flag(const struct rdma_service_cap *rdma_cap, struct ib_device_attr *props) { if ((rdma_cap->flags & RDMA_BMME_FLAG_TYPE_2_WIN) != 0) { if ((rdma_cap->flags & RDMA_BMME_FLAG_WIN_TYPE_2B) != 0) props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_WINDOW_TYPE_2A; } } static void roce3_query_device_props_set(struct roce3_device *rdev, struct rdma_service_cap *rdma_cap, struct ib_device_attr *props) { props->vendor_id = rdev->pdev->vendor; props->vendor_part_id = rdev->pdev->device; roce3_parse_fw_version(rdev, &props->fw_ver); props->hw_ver = ROCE_HW_VER; /* sys_image_guid equal GID */ props->sys_image_guid = rdev->ib_dev.node_guid; props->max_mr_size = ~0ULL; props->page_size_cap = rdma_cap->page_size_cap; props->max_qp = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_qps - rdma_cap->reserved_qps); props->max_qp_wr = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_wqes; /* * 4.19 ofed will return the smaller of sq/rq sge num to user space. * 4.17 We use max_sge to only represent max sq sge num, max_rq_sge is a fixed macro of 16. */ props->max_send_sge = rdma_cap->max_sq_sg; props->max_recv_sge = rdma_cap->dev_rdma_cap.roce_own_cap.max_rq_sg; props->max_cq = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_cqs - rdma_cap->reserved_cqs); props->max_cqe = (int)rdma_cap->max_cqes; if ((rdev->board_info.port_num == ROCE_PORT_NUM_2) && (rdev->board_info.port_speed == ROCE_25G_PORT_SPEED)) { // 2 smf for 64B cache props->max_mr = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_mpts - rdma_cap->reserved_mrws) / MEND_CAP_DEVIDE; props->max_srq = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs - rdma_cap->dev_rdma_cap.roce_own_cap.reserved_srqs) / MEND_CAP_DEVIDE; } else { props->max_mr = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_mpts - rdma_cap->reserved_mrws); props->max_srq = (int)(rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs - rdma_cap->dev_rdma_cap.roce_own_cap.reserved_srqs); } props->max_mw = props->max_mr; props->max_pd = (int)(rdma_cap->num_pds - rdma_cap->reserved_pds); props->max_qp_rd_atom = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_qp_dest_rdma; props->max_qp_init_rd_atom = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_wr = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_srq_wqes; props->max_srq_sge = (int)rdma_cap->dev_rdma_cap.roce_own_cap.max_srq_sge; props->max_fast_reg_page_list_len = rdma_cap->max_frpl_len; props->local_ca_ack_delay = (u8)rdma_cap->local_ca_ack_delay; props->atomic_cap = ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_ATOMIC) != 0) ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = (u16)rdma_cap->max_pkeys; props->max_ah = INT_MAX; } /* **************************************************************************** Prototype : roce3_query_device Description : query device attribute Input : struct ib_device *ibdev struct ib_device_attr *props struct ib_udata *uhw Output : struct ib_device_attr *props 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { struct roce3_device *rdev = NULL; struct rdma_service_cap *rdma_cap = NULL; if ((ibdev == NULL) || (props == NULL)) { pr_err("[ROCE] %s: Ibdev or props is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } memset(props, 0, sizeof(*props)); rdma_cap = &rdev->rdma_cap; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; /* APM */ if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_APM) != 0) props->device_cap_flags = props->device_cap_flags | IB_DEVICE_AUTO_PATH_MIG; /* rsvd_lKey */ if ((rdma_cap->flags & RDMA_BMME_FLAG_RESERVED_LKEY) != 0) props->device_cap_flags = props->device_cap_flags | IB_DEVICE_LOCAL_DMA_LKEY; roce3_set_local_cap_flag(rdma_cap, props); #ifndef ROCE_COMPUTE /* support XRC */ if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_XRC) != 0) props->device_cap_flags = props->device_cap_flags | IB_DEVICE_XRC; #endif /* support MW */ if ((rdma_cap->flags & RDMA_DEV_CAP_FLAG_MEM_WINDOW) != 0) props->device_cap_flags = props->device_cap_flags | IB_DEVICE_MEM_WINDOW; roce3_set_bmme_cap_flag(rdma_cap, props); roce3_query_device_props_set(rdev, rdma_cap, props); return 0; } static void eth_link_get_speed(struct ib_port_attr *props, enum mag_cmd_port_speed speed) { switch (speed) { /* 10G <==> 1X x 10G */ case PORT_SPEED_10GB: props->active_width = IB_WIDTH_1X; props->active_speed = IB_SPEED_QDR; break; /* 25G <==> 1X x 25G */ case PORT_SPEED_25GB: props->active_width = IB_WIDTH_1X; props->active_speed = IB_SPEED_EDR; break; /* 40G <==> 4X x 10G */ case PORT_SPEED_40GB: props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; break; /* 100G <==> 4X x 25G */ case PORT_SPEED_100GB: props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_EDR; break; default: props->active_width = 0; props->active_speed = 0; break; } } static void roce3_set_ib_port_attr(struct ib_port_attr *props, struct roce3_device *rdev) { props->port_cap_flags = IB_PORT_CM_SUP; props->gid_tbl_len = (int)rdev->rdma_cap.max_gid_per_port; props->max_msg_sz = rdev->rdma_cap.dev_rdma_cap.roce_own_cap.max_msg_sz; props->pkey_tbl_len = (u16)rdev->rdma_cap.max_pkeys; props->max_mtu = IB_MTU_4096; props->state = IB_PORT_DOWN; props->phys_state = ROCE_PORT_PHYS_STATE_DISABLED; props->active_mtu = IB_MTU_256; } #ifdef OFED_MLNX_5_8 static void eth_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) #else static void eth_link_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) #endif { struct roce3_device *rdev = NULL; struct net_device *netdev = NULL; #ifdef ROCE_BONDING_EN struct net_device *upper = NULL; #endif enum ib_mtu mtu; enum mag_cmd_port_speed speed = PORT_SPEED_10GB; int ret = 0; rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u), dev_name(%s).\n", __func__, rdev->glb_func_id, ibdev->name); return; } roce3_set_ib_port_attr(props, rdev); ret = hinic3_get_speed(rdev->hwdev, &speed, HINIC3_CHANNEL_ROCE); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to get speed, func_id(%d)\n", __func__, rdev->glb_func_id); props->active_width = 0; props->active_speed = 0; return; } eth_link_get_speed(props, speed); netdev = roce3_ib_get_netdev(ibdev, ROCE_DEFAULT_PORT_NUM); if (netdev == NULL) return; #ifdef ROCE_BONDING_EN if (roce3_bond_is_active(rdev)) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(netdev); if (upper != NULL) { dev_put(netdev); netdev = upper; dev_hold(netdev); } rcu_read_unlock(); } #endif if (netif_running(netdev) && netif_carrier_ok(netdev)) { props->state = IB_PORT_ACTIVE; props->phys_state = ROCE_PORT_PHYS_STATE_LINKUP; } mtu = (enum ib_mtu)iboe_get_mtu((int)netdev->mtu); dev_put(netdev); props->active_mtu = ROCE_MIN(props->max_mtu, mtu); } /* **************************************************************************** Prototype : roce3_query_port Description : query port attribute Input : struct ib_device *ibdev u8 port struct ib_port_attr *props Output : struct ib_port_attr *props 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { if ((ibdev == NULL) || (props == NULL)) { pr_err("[ROCE] %s: Ibdev or props is null\n", __func__); return -EINVAL; } memset(props, 0, sizeof(*props)); eth_link_query_port(ibdev, port, props); return 0; } /* **************************************************************************** Prototype : roce3_query_gid Description : query gid Input : struct ib_device *ibdev u8 port int index union ib_gid *gid Output : union ib_gid *gid 1.Date : 2015/5/8 Modification : Created function 2.Date : 2015/6/8 Modification : Modify function **************************************************************************** */ int roce3_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { int ret = 0; struct roce3_device *rdev = NULL; struct rdma_gid_entry gid_entry; if ((ibdev == NULL) || (gid == NULL)) { pr_err("[ROCE] %s: Ibdev or gid is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } ret = roce3_rdma_get_gid(rdev->hwdev, (u32)port, (u32)index, &gid_entry); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to get gid, func_id(%d)\n", __func__, rdev->glb_func_id); return ret; } memcpy((void *)gid->raw, (void *)gid_entry.raw, sizeof(*gid)); // 按照OFED的gid生成方式转换GID, 仅IPv4场景需要转换 if (gid_entry.dw6_h.bs.gid_type == ROCE_IPv4_ROCEv2_GID) { // 未add的gid直接返回,不需要转换 if ((gid->global.subnet_prefix == 0) && (gid->global.interface_id == 0)) return 0; ipv6_addr_set_v4mapped(*((u32 *)(void *)gid + ROCE_GID_IP_IDX), (struct in6_addr *)gid); } return 0; } /* **************************************************************************** Prototype : roce3_query_pkey Description : query pkey Input : struct ib_device *ibdev u8 port u16 index u16 *pkey Output : u16 *pkey 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { struct roce3_device *rdev = NULL; if ((ibdev == NULL) || (pkey == NULL)) { pr_err("[ROCE] %s: Ibdev or pkey is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } if (pkey == NULL) { pr_err("[ROCE] %s: Pkey is null\n", __func__); return -EINVAL; } *pkey = 0xffff; return 0; } /* **************************************************************************** Prototype : roce3_modify_device Description : modify device attribute Input : struct ib_device *ibdev int mask struct ib_device_modify *props Output : None 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { unsigned long flags = 0; struct roce3_device *rdev = NULL; if ((ibdev == NULL) || (props == NULL)) { pr_err("[ROCE] %s: Ibdev or props is null\n", __func__); return -EINVAL; } if (((unsigned int)mask & ~IB_DEVICE_MODIFY_NODE_DESC) != 0) { pr_err("[ROCE] %s: Not supported to modify node description\n", __func__); return -EOPNOTSUPP; } if ((((u32)mask) & IB_DEVICE_MODIFY_NODE_DESC) == 0) { pr_info("[ROCE] %s: No need to modify node description\n", __func__); return 0; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } spin_lock_irqsave(&rdev->node_desc_lock, flags); memcpy((void *)ibdev->node_desc, (void *)props->node_desc, IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&rdev->node_desc_lock, flags); return 0; } /* **************************************************************************** Prototype : roce3_modify_port Description : modify port attribute Input : struct ib_device *ibdev u8 port int mask struct ib_port_modify *props Output : None 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_modify_port(struct ib_device *ibdev, u32 port, int mask, struct ib_port_modify *props) { int ret = 0; struct ib_port_attr attr; struct roce3_device *rdev = NULL; if (ibdev == NULL) { pr_err("[ROCE] %s: Ibdev is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } memset(&attr, 0, sizeof(struct ib_port_attr)); mutex_lock(&rdev->cap_mask_mutex); ret = roce3_query_port(ibdev, port, &attr); if (ret != 0) dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to query port, func_id(%d)\n", __func__, rdev->glb_func_id); mutex_unlock(&rdev->cap_mask_mutex); return ret; } static void roce3_alloc_ucontext_set(struct roce3_device *rdev, struct roce3_alloc_ucontext_resp *resp) { struct rdma_service_cap *rdma_cap = NULL; rdma_cap = &rdev->rdma_cap; resp->num_qps = rdma_cap->dev_rdma_cap.roce_own_cap.max_qps; resp->num_xsrqs = rdma_cap->dev_rdma_cap.roce_own_cap.max_srqs; resp->cqe_size = rdma_cap->cqe_size; resp->wqebb_size = rdma_cap->wqebb_size; resp->dwqe_size = rdma_cap->direct_wqe_size; resp->max_msg_size = rdma_cap->dev_rdma_cap.roce_own_cap.max_msg_sz; resp->max_comp_vector = rdma_cap->num_comp_vectors; resp->max_inline_size = rdma_cap->dev_rdma_cap.roce_own_cap.max_sq_inline_data_sz; resp->storage_aa_en = roce3_is_roceaa(rdev->cfg_info.scence_id); resp->phy_port = rdev->hw_info.phy_port; resp->srq_container_en = rdev->cfg_info.srq_container_en; resp->srq_container_mode = rdev->cfg_info.srq_container_mode; resp->xrc_srq_container_mode = rdev->cfg_info.xrc_srq_container_mode; resp->warn_th = rdev->cfg_info.warn_th; roce3_resp_set_ext(rdev, resp); } static int roce3_alloc_ucontext_pre_check(struct ib_device *ibdev, const struct ib_udata *udata) { struct roce3_device *rdev = NULL; if ((ibdev == NULL) || (udata == NULL)) { pr_err("[ROCE] %s: Ibdev or udata is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } if (!rdev->ib_active) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Device is abnormal, func_id(%d)\n", __func__, rdev->glb_func_id); return -EAGAIN; } return 0; } static int roce3_alloc_ucontext_return(struct roce3_device *rdev, struct ib_udata *udata, struct roce3_ucontext *context, struct roce3_alloc_ucontext_resp *resp) { int ret; resp->db_offset = context->db_dma_addr & ((1 << PAGE_SHIFT) - 1); resp->dwqe_offset = context->dwqe_dma_addr & ((1 << PAGE_SHIFT) - 1); if (context->dwqe_dma_addr == 0) resp->dwqe_size = 0; roce3_ucontext_set_ext(rdev, context); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); /* Copy data to user space */ ret = ib_copy_to_udata_ext(udata, resp); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to copy data to user space, func_id(%d)\n", __func__, rdev->glb_func_id); return ret; } return 0; } int roce3_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) { int ret; struct roce3_ucontext *context = rdma_udata_to_drv_context( udata, struct roce3_ucontext, ibucontext); struct roce3_device *rdev = to_roce3_dev(ibucontext->device); struct roce3_alloc_ucontext_resp *resp = NULL; ret = roce3_alloc_ucontext_pre_check(ibucontext->device, udata); if (ret != 0) return ret; resp = roce3_resp_alloc_ext(); if (resp == NULL) { ret = (-ENOMEM); dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to alloc ucontext, func_id(%u)\n", __func__, rdev->glb_func_id); goto err; } roce3_alloc_ucontext_set(rdev, resp); /* Alloc user space context Doorbell and DWQE */ ret = hinic3_alloc_db_phy_addr(rdev->hwdev, &context->db_dma_addr, &context->dwqe_dma_addr); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to alloc DB pa, ret(%d), func_id(%u)\n", __func__, ret, rdev->glb_func_id); goto err_db; } /* Copy data to user space */ ret = roce3_alloc_ucontext_return(rdev, udata, context, resp); if (ret != 0) goto err_return; kfree(resp); return 0; err_return: hinic3_free_db_phy_addr(rdev->hwdev, context->db_dma_addr, context->dwqe_dma_addr); err_db: kfree(resp); err: return ret; } void roce3_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct roce3_ucontext *context = NULL; struct roce3_device *rdev = NULL; if (ibcontext == NULL) { pr_err("[ROCE] %s: Ibcontext is null\n", __func__); return; } context = to_roce3_ucontext(ibcontext); rdev = to_roce3_dev(ibcontext->device); hinic3_free_db_phy_addr(rdev->hwdev, context->db_dma_addr, context->dwqe_dma_addr); } /* **************************************************************************** Prototype : roce3_mmap Description : memory map Input : struct ib_ucontext *ibcontext struct vm_area_struct *vma Output : None 1.Date : 2015/5/8 Modification : Created function **************************************************************************** */ int roce3_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct roce3_device *rdev = NULL; struct roce3_ucontext *ucontext = NULL; unsigned long db_pfn = 0; unsigned long dwqe_pfn = 0; int res = 0; if ((ibcontext == NULL) || (vma == NULL)) { pr_err("[ROCE] %s: Ibcontext or vma is null\n", __func__); return -EINVAL; } rdev = to_roce3_dev(ibcontext->device); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } ucontext = to_roce3_ucontext(ibcontext); db_pfn = ucontext->db_dma_addr >> PAGE_SHIFT; dwqe_pfn = ucontext->dwqe_dma_addr >> PAGE_SHIFT; if ((vma->vm_end - vma->vm_start) != PAGE_SIZE) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: (Vm_end - vm_start) is not equal to PAGE_SIZE, func_id(%d)\n", __func__, rdev->glb_func_id); return -EINVAL; } /* map hw DB to physical page from user */ if (vma->vm_pgoff == USR_MMAP_DB_OFFSET) { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* construct vm_start~vm_start+PAGE_SIZE page table * db_pfn is page number * vm_page_prot means attr */ if (io_remap_pfn_range(vma, vma->vm_start, db_pfn, PAGE_SIZE, vma->vm_page_prot) != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to do db io remap, func_id(%d)\n", __func__, rdev->glb_func_id); return -EAGAIN; } return 0; } // DWQE mmap if ((vma->vm_pgoff == USR_MMAP_DWQE_OFFSET) && (rdev->rdma_cap.direct_wqe_size != 0)) { #ifdef __aarch64__ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); #else vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); #endif if (io_remap_pfn_range(vma, vma->vm_start, dwqe_pfn, PAGE_SIZE, vma->vm_page_prot) != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] %s: Failed to do dwqe io remap, func_id(%d)\n", __func__, rdev->glb_func_id); return -EAGAIN; } return 0; } res = roce3_mmap_ext(rdev, ucontext, vma); return res; } enum rdma_link_layer roce3_port_link_layer(struct ib_device *ibdev, u32 port_num) { struct roce3_device *rdev = NULL; if (ibdev == NULL) return IB_LINK_LAYER_UNSPECIFIED; rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return IB_LINK_LAYER_UNSPECIFIED; } if (port_num != ROCE_DEFAULT_PORT_NUM) return IB_LINK_LAYER_UNSPECIFIED; return IB_LINK_LAYER_ETHERNET; } static void roce3_resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { ((struct roce3_resolve_cb_context *)context)->status = status; complete(&((struct roce3_resolve_cb_context *)context)->comp); } static int roce3_rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, u8 *dmac, const struct net_device *ndev, int *hoplimit, struct roce3_device *rdev) { struct rdma_dev_addr dev_addr; struct roce3_resolve_cb_context ctx; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; int ret; rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); if (ndev) { dev_addr.bound_dev_if = ndev->ifindex; dev_addr.net = dev_net(ndev); } else { dev_addr.net = &init_net; } init_completion(&ctx.comp); ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, &dev_addr, RESOLVE_IP_TIME_OUT, roce3_resolve_cb, false, &ctx); if (ret != 0) { pr_err("[ROCE] %s: rdma_resolve_ip failed. Igonore the err.\n", __func__); roce3_resolve_cb(0, &sgid_addr._sockaddr, &dev_addr, &ctx); } wait_for_completion(&ctx.comp); memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); if (hoplimit) *hoplimit = dev_addr.hoplimit; return 0; } static int roce3_ah_valid_check(struct ib_global_route *grh, u16 *vlan_id, u8 *dmac) { u8 unicast_gid0[ROCE_GID_LEN] = { 0 }; u8 unicast_gid1[ROCE_GID_LEN] = { 0 }; /* check gid(unicast gid can not be 0 or 1) */ unicast_gid0[ROCE_GID_HIGHEST_BYTE] = 0; unicast_gid1[ROCE_GID_HIGHEST_BYTE] = 1; if ((ROCE_MEMCMP(grh->dgid.raw, unicast_gid0, sizeof(union ib_gid)) == 0) || (ROCE_MEMCMP(grh->dgid.raw, unicast_gid1, sizeof(union ib_gid)) == 0)) { pr_err("[ROCE] %s: Invalid unicast dgid\n", __func__); return (-EINVAL); } if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) != 0) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, dmac); *vlan_id = ROCE_DEFAULT_VLAN_ID; } return 0; } static int roce3_fill_gid_attr(struct roce3_device *rdev, struct rdma_ah_attr *ah_attr, union ib_gid *sgid, const struct ib_gid_attr **sgid_attr) { int ret = 0; ret = rdma_query_gid(&rdev->ib_dev, ah_attr->port_num, ah_attr->grh.sgid_index, sgid); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to query gid func_id(%u),port_num(%d),gid_index(%d),ret(%d)\n", rdev->glb_func_id, ah_attr->port_num, ah_attr->grh.sgid_index, ret); return ret; } *sgid_attr = rdma_get_gid_attr(&rdev->ib_dev, ah_attr->port_num, ah_attr->grh.sgid_index); if (IS_ERR_OR_NULL(*sgid_attr)) { ret = (int)PTR_ERR(*sgid_attr); dev_err(rdev->hwdev_hdl, "[ROCE] : Failed to get sgid_attr, func_id(%u), ret(%d).\n", rdev->glb_func_id, ret); return ret; } return ret; } static void roce3_release_gid_ref_cnt(const struct ib_gid_attr *sgid_attr) { rdma_put_gid_attr(sgid_attr); } static struct net_device *roce3_fill_netdev(struct roce3_device *rdev, union ib_gid *sgid) { struct net_device *netdev = NULL; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } socket_addr; rdma_gid2ip((struct sockaddr *)&socket_addr, sgid); /* find netdev,rdev->ndevis not valid in vlan scenario */ netdev = ip_dev_find(&init_net, ((const struct sockaddr_in *)&socket_addr._sockaddr)->sin_addr.s_addr); if (netdev) dev_put(netdev); return netdev; } int roce3_resolve_grh(struct roce3_device *rdev, struct rdma_ah_attr *ah_attr, u16 *vlan_id, struct ib_udata *udata) { int ret = 0; u8 zero_mac[ETH_ALEN] = { 0 }; u8 *dmac = NULL; union ib_gid sgid; const struct ib_gid_attr *sgid_attr = NULL; struct net_device *netdev = NULL; if ((rdev == NULL) || (ah_attr == NULL) || (vlan_id == NULL)) { pr_err("[ROCE, ERR] %s: Input pointer is NULL, rdev(%p), ah_attr(%p), vlan_id(%p).\n", __func__, rdev, ah_attr, vlan_id); return (-EINVAL); } dmac = ah_attr->roce.dmac; ret = roce3_ah_valid_check(&ah_attr->grh, vlan_id, dmac); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to check grh input, func_id(%u), ret(%d).\n", rdev->glb_func_id, ret); return ret; } if (ROCE_MEMCMP(dmac, zero_mac, ETH_ALEN) != 0) return 0; ret = roce3_fill_gid_attr(rdev, ah_attr, &sgid, &sgid_attr); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to fill gid attr, func_id(%u), ret(%d)\n", rdev->glb_func_id, ret); return ret; } netdev = roce3_fill_netdev(rdev, &sgid); /* reparse dmac avoiding invalid damc from OFED */ ret = roce3_rdma_addr_find_l2_eth_by_grh(&sgid, &ah_attr->grh.dgid, dmac, netdev, NULL, rdev); if (ret != 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to find dmac by grh, func_id(%u)\n", rdev->glb_func_id); goto resolve_grh_end; } if (ROCE_MEMCMP(dmac, zero_mac, ETH_ALEN) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Failed to find valid dmac, func_id(%u)\n", rdev->glb_func_id); ret = (-EINVAL); goto resolve_grh_end; } *vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); resolve_grh_end: roce3_release_gid_ref_cnt(sgid_attr); return ret; } static int ah_get_vlan_id(struct roce3_device *rdev, struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 *vlan_id) { struct net_device *ndev; rcu_read_lock(); ndev = rcu_dereference(ah_attr->grh.sgid_attr->ndev); if (ndev == NULL) { rcu_read_unlock(); dev_err(rdev->hwdev_hdl, "[ROCE, ERR] : Net device is NULL, func_id(%u)\n", rdev->glb_func_id); return -EINVAL; } *vlan_id = rdma_vlan_dev_vlan_id(ndev); rcu_read_unlock(); return 0; } static int create_ib_ah(struct roce3_device *rdev, struct ib_pd *pd, struct roce3_ah *rah, struct rdma_ah_attr *ah_attr) { int ret; u8 *dmac = ah_attr->roce.dmac; u32 vlan_id = 0; ret = ah_get_vlan_id(rdev, pd, ah_attr, &vlan_id); if (ret != 0) { pr_err("[ROCE, ERR] %s: Failed to get vlan_id (ret:%d)\n", __func__, ret); return -EFAULT; } if (((u32)rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) != 0) { memcpy((void *)rah->priv_ah.dgid, (void *)ah_attr->grh.dgid.raw, sizeof(rah->priv_ah.dgid)); rah->priv_ah.dw2.bs.flow_label = ah_attr->grh.flow_label & 0xfffff; rah->priv_ah.dw1.bs.sgid_index = ah_attr->grh.sgid_index & 0x7f; rah->priv_ah.dw1.bs.hoplimit = ah_attr->grh.hop_limit; rah->priv_ah.dw1.bs.tclass = (u8)(ah_attr->grh.traffic_class | 0x2); } rah->priv_ah.dw0.bs.pd = to_roce3_pd(pd)->pdn & 0x3ffff; rah->priv_ah.dw0.bs.wqe_cos = roce3_get_db_cos_from_vlan_pri(rdev, ah_attr->sl); rah->priv_ah.dw0.value = cpu_to_be32(rah->priv_ah.dw0.value); rah->priv_ah.dw1.bs.port = ah_attr->port_num & 0xf; rah->priv_ah.dw2.bs.smac_index = rdev->glb_func_id; /* set global Function ID */ rah->priv_ah.dw2.value = cpu_to_be32(rah->priv_ah.dw2.value); rah->priv_ah.dw1.bs.resv = 0; rah->priv_ah.dw7.bs.vlan_id = vlan_id & 0xfff; rah->priv_ah.dw7.bs.vlan_pri = ah_attr->sl & 0x7; rah->priv_ah.dw1.value = cpu_to_be32(rah->priv_ah.dw1.value); rah->priv_ah.dw7.bs.dmac_h16 = (dmac[0] << ROCE_RAH_DMAC_H16_SHIFT) | dmac[1]; rah->priv_ah.dw7.value = cpu_to_be32(rah->priv_ah.dw7.value); memcpy((void *)&rah->priv_ah.dmac_l32, (void *)&dmac[ROCE_RAH_DMAC_L32_START], sizeof(rah->priv_ah.dmac_l32)); return 0; } int roce3_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { struct roce3_ah *rah = to_roce3_ah(ibah); struct roce3_device *rdev = to_roce3_dev(ibah->device); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum rdma_ah_attr_type ah_type = ah_attr->type; if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) && (((u32)rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) == 0)) return -EINVAL; return create_ib_ah(rdev, ibah->pd, rah, ah_attr); } int roce3_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct roce3_ah *ah = NULL; struct roce3_priv_ah priv_ah; if ((ibah == NULL) || (ah_attr == NULL)) { pr_err("[ROCE] %s: Ibah or ah_attr is null\n", __func__); return -EINVAL; } ah = to_roce3_ah(ibah); memset(ah_attr, 0, sizeof(*ah_attr)); priv_ah.dw1.value = be32_to_cpu(ah->priv_ah.dw1.value); priv_ah.dw2.value = be32_to_cpu(ah->priv_ah.dw2.value); priv_ah.dw7.value = be32_to_cpu(ah->priv_ah.dw7.value); ah_attr->ah_flags = IB_AH_GRH; ah_attr->sl = priv_ah.dw7.bs.vlan_pri; ah_attr->port_num = priv_ah.dw1.bs.port; ah_attr->grh.traffic_class = priv_ah.dw1.bs.tclass; ah_attr->grh.hop_limit = priv_ah.dw1.bs.hoplimit; ah_attr->grh.sgid_index = priv_ah.dw1.bs.sgid_index; ah_attr->grh.flow_label = priv_ah.dw2.bs.flow_label; memcpy((void *)ah_attr->grh.dgid.raw, (void *)ah->priv_ah.dgid, sizeof(ah->priv_ah.dgid)); return 0; } int roce3_destroy_ah(struct ib_ah *ibah, u32 flags) { return 0; } int roce3_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; struct roce3_device *rdev = to_roce3_dev(ibdev); if (roce3_hca_is_present(rdev) == 0) { dev_err(rdev->hwdev_hdl, "[ROCE] %s: HCA not present(return fail), func_id(%u)\n", __func__, rdev->glb_func_id); return -EPERM; } immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; // only rocev2 err = ib_query_port(ibdev, port_num, &attr); if (err != 0) { pr_err("[ROCE] %s: query ib port failed\n", __func__); return err; } immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } int roce3_get_dcb_cfg_cos(struct roce3_device *rdev, struct roce3_get_cos_inbuf *inbuf, u8 *cos) { int ret; u8 pri; struct rdma_gid_entry gid; struct hinic3_dcb_state dcb = { 0 }; ret = roce3_rdma_get_gid(rdev->hwdev, inbuf->port_num, inbuf->sgid_index, &gid); if (ret != 0) { pr_err("[ROCE, ERR] %s: Failed to init gid info\n", __func__); return (-EINVAL); } ret = hinic3_get_dcb_state(rdev->hwdev, &dcb); if (ret != 0) { pr_err("[ROCE] %s: hinic3_get_dcb_state failed.ret: %d.\n", __func__, ret); return (-EINVAL); } *cos = dcb.default_cos; gid.dw6_h.value = cpu_to_le16(gid.dw6_h.value); if ((dcb.trust == ROCE3_DCB_PCP) && (gid.dw6_h.bs.tag == ROCE_GID_VLAN_INVALID)) { // pcp cfg & no vlan should use default cos return 0; } pri = (dcb.trust == ROCE3_DCB_PCP) ? inbuf->sl : (inbuf->traffic_class >> ROCE3_DSCP_IDX); ret = hinic3_get_cos_by_pri(rdev->hwdev, pri, cos); if (ret != 0) { pr_err("[ROCE] %s: get_cos_by_pri failed.ret: %d, pri:%u, dcb_on:%u, trust:%u.\n", __func__, ret, pri, dcb.dcb_on, dcb.trust); return (-EINVAL); } return 0; }