// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2021 - 2023, Shanghai Yunsilicon Technology Co., Ltd. * All rights reserved. */ #include #include #include "xsc_ib.h" #include "global.h" #include "user.h" #include "common/xsc_hsi.h" #include "common/xsc_lag.h" #include #include #include /* not supported currently */ static int wq_signature; #define MAD_QUEUE_DEPTH 128 enum { XSC_IB_CACHE_LINE_SIZE = 64, }; #define LAG_PORT_NUM_MASK_EN 0x80000000 #define LAG_PORT_NUM_MASK_EN_OFFSET 31 #define LAG_PORT_NUM_MASK 0x30000 #define LAG_PORT_NUM_OFFSET 16 #define UDP_SPORT_MASK_EN 0x40000000 #define UDP_SPORT_MASK_EN_OFFSET 30 #define UDP_SPORT_MASK 0xffff #define UDP_SPORT_OFFSET 0 static const u32 xsc_ib_opcode[] = { [IB_WR_SEND] = XSC_MSG_OPCODE_SEND, [IB_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND, [IB_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE, [IB_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ, [IB_WR_LOCAL_INV] = XSC_MSG_OPCODE_SEND, [IB_WR_REG_MR] = XSC_MSG_OPCODE_SEND, }; static int is_qp0(enum ib_qp_type qp_type) { return qp_type == IB_QPT_SMI; } static int is_qp1(enum ib_qp_type qp_type) { return qp_type == IB_QPT_GSI; } static int is_sqp(enum ib_qp_type qp_type) { return is_qp0(qp_type) || is_qp1(qp_type); } static void *get_wqe(struct xsc_ib_qp *qp, int offset) { return xsc_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct xsc_ib_qp *qp, int n) { return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); } static void *get_seg_wqe(void *first, int n) { return first + (n << XSC_BASE_WQE_SHIFT); } void *xsc_get_send_wqe(struct xsc_ib_qp *qp, int n) { return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); } static int iboe_tos_to_sl(struct net_device *ndev, int tos) { int prio; struct net_device *dev; prio = rt_tos2priority(tos); dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); #if IS_ENABLED(CONFIG_VLAN_8021Q) if (is_vlan_dev(ndev)) return (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; #endif return 0; } static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, u32 msg_len, u64 remote_addr, u32 rkey) { remote_seg->in_line = 0; WR_LE_32(remote_seg->seg_len, msg_len); WR_LE_32(remote_seg->mkey, rkey); WR_LE_64(remote_seg->va, remote_addr); } static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ib_sge *sg) { data_seg->in_line = 0; WR_LE_32(data_seg->seg_len, sg->length); WR_LE_32(data_seg->mkey, sg->lkey); WR_LE_64(data_seg->va, sg->addr); } static int set_data_inl_seg(struct xsc_ib_qp *qp, const struct ib_send_wr *wr, void *ctrl) { struct xsc_wqe_data_seg *data_seg; unsigned int seg_index; void *addr; int len; int i; for (i = 0, seg_index = 1; i < wr->num_sge; ++i, ++seg_index) { if (likely(wr->sg_list[i].length)) { addr = (void *)wr->sg_list[i].addr; len = wr->sg_list[i].length; if (unlikely(len > qp->max_inline_data)) return -ENOMEM; data_seg = get_seg_wqe(ctrl, seg_index); data_seg->in_line = 1; data_seg->len = len; memcpy(data_seg->in_line_data, addr, len); } } return 0; } static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: return wr->ex.imm_data; default: return 0; } } static void xsc_ib_qp_event(struct xsc_core_qp *qp, int type) { struct ib_qp *ibqp = &to_xibqp(qp)->ibqp; struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; event.element.qp = ibqp; switch (type) { case XSC_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_QP_FATAL; break; case XSC_EVENT_TYPE_WQ_INVAL_REQ_ERROR: event.event = IB_EVENT_QP_REQ_ERR; break; case XSC_EVENT_TYPE_WQ_ACCESS_ERROR: event.event = IB_EVENT_QP_ACCESS_ERR; break; default: pr_warn("xsc_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); return; } ibqp->event_handler(&event, ibqp->qp_context); } } static int set_rq_size(struct xsc_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct xsc_ib_qp *qp, struct xsc_ib_create_qp *ucmd) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->xdev->caps.max_wqes) return -EINVAL; if (!has_rq) { qp->rq.max_gs = 0; qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; qp->rq.wqe_shift = ucmd->rq_wqe_shift; qp->rq.max_gs = 1; qp->rq.max_post = qp->rq.wqe_cnt; } else { qp->rq.wqe_cnt = roundup_pow_of_two(cap->max_recv_wr); qp->rq.wqe_shift = dev->xdev->caps.recv_wqe_shift; qp->rq.max_gs = dev->xdev->caps.recv_ds_num; qp->rq.max_post = qp->rq.wqe_cnt; } } return 0; } static int calc_sq_size(struct xsc_ib_dev *dev, struct ib_qp_init_attr *attr, struct xsc_ib_qp *qp) { int wqe_size; int wq_size; if (!attr->cap.max_send_wr) return -1; wqe_size = 1 << dev->xdev->caps.send_wqe_shift; qp->max_inline_data = (dev->xdev->caps.send_ds_num - 2) * sizeof(struct xsc_wqe_data_seg); attr->cap.max_inline_data = qp->max_inline_data; qp->sq.wqe_cnt = roundup_pow_of_two(attr->cap.max_send_wr); qp->sq.ds_cnt = qp->sq.wqe_cnt << (dev->xdev->caps.send_wqe_shift - XSC_BASE_WQE_SHIFT); wq_size = qp->sq.wqe_cnt * wqe_size; qp->sq.wqe_shift = ilog2(wqe_size); qp->sq.max_gs = dev->xdev->caps.send_ds_num - XSC_CTRL_SEG_NUM - XSC_RADDR_SEG_NUM; qp->sq.max_post = qp->sq.wqe_cnt; return wq_size; } static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT || attr->srq || !attr->cap.max_recv_wr) return 0; return 1; } static enum xsc_qp_state to_xsc_state(enum ib_qp_state state) { switch (state) { case IB_QPS_RESET: return XSC_QP_STATE_RST; case IB_QPS_INIT: return XSC_QP_STATE_INIT; case IB_QPS_RTR: return XSC_QP_STATE_RTR; case IB_QPS_RTS: return XSC_QP_STATE_RTS; case IB_QPS_SQD: return XSC_QP_STATE_SQD; case IB_QPS_SQE: return XSC_QP_STATE_SQER; case IB_QPS_ERR: return XSC_QP_STATE_ERR; default: return -1; } } static int create_user_qp(struct xsc_ib_dev *dev, struct ib_pd *pd, struct xsc_ib_qp *qp, struct ib_udata *udata, struct xsc_create_qp_mbox_in **in, struct xsc_ib_create_qp_resp *resp, int *inlen) { struct xsc_ib_ucontext *context; struct xsc_ib_create_qp ucmd; int page_shift; int npages; u32 offset; int ncont; int err; int hw_npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); xsc_ib_dbg(dev, "buf_addr:0x%lx db_addr:0x%lx sq cnt:%u, rq cnt:%u, rq shift:%u\n", (uintptr_t)ucmd.buf_addr, (uintptr_t)ucmd.db_addr, ucmd.sq_wqe_count, ucmd.rq_wqe_count, ucmd.rq_wqe_shift); if (err) { xsc_ib_dbg(dev, "copy failed\n"); return err; } context = to_xucontext(pd->uobject->context); qp->sq.ds_cnt = ucmd.sq_wqe_count; qp->sq.wqe_cnt = ucmd.sq_wqe_count; qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT; qp->rq.ds_cnt = ucmd.rq_wqe_count; qp->rq.wqe_cnt = ucmd.rq_wqe_count; qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT; qp->buf_size = (qp->sq.wqe_cnt << qp->sq.wqe_shift) + (qp->rq.wqe_cnt << qp->rq.wqe_shift); qp->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { xsc_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(qp->umem); goto err_uuar; } xsc_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, &ncont, NULL); if (ncont != npages) { page_shift = PAGE_SHIFT; ncont = npages; } hw_npages = DIV_ROUND_UP(qp->buf_size, PAGE_SIZE_4K); err = xsc_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); if (err) { xsc_ib_warn(dev, "bad offset\n"); goto err_umem; } xsc_ib_dbg(dev, "npage:%d, page_shift:%d, ncont:%d, offset:%d, hw_npages %d\n", npages, page_shift, ncont, offset, hw_npages); *inlen = sizeof(**in) + sizeof(*((*in)->req.pas)) * hw_npages; *in = xsc_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_umem; } xsc_ib_populate_pas(dev, qp->umem, page_shift, (*in)->req.pas, hw_npages, true); (*in)->req.pa_num = cpu_to_be16(hw_npages); err = ib_copy_to_udata(udata, resp, sizeof(*resp)); if (err) { xsc_ib_dbg(dev, "copy failed\n"); goto err_umem; } qp->create_type = XSC_QP_USER; return 0; err_umem: ib_umem_release(qp->umem); err_uuar: return err; } static void destroy_qp_user(struct ib_pd *pd, struct xsc_ib_qp *qp) { struct xsc_ib_ucontext *context; context = to_xucontext(pd->uobject->context); ib_umem_release(qp->umem); } #define MAX_QP1_SQ_HDR_SIZE_V2 512 #define MAX_QP1_SQ_HDR_SIZE 86 /* Ethernet header = 14 */ /* ib_grh = 40 (provided by MAD) */ /* ib_bth + ib_deth = 20 */ /* MAD = 256 (provided by MAD) */ /* iCRC = 4 */ #define MAX_QP1_RQ_HDR_SIZE_V2 512 static int create_kernel_qp(struct xsc_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct xsc_ib_qp *qp, struct xsc_create_qp_mbox_in **in, int *inlen) { int err; int sq_size; int hw_npages; sq_size = calc_sq_size(dev, init_attr, qp); if (sq_size < 0) { err = -ENOMEM; xsc_ib_err(dev, "err %d\n", err); return err; } qp->rq.ds_cnt = qp->rq.wqe_cnt << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->buf_size = qp->sq.offset + sq_size; qp->send_psn = 0; err = xsc_buf_alloc(dev->xdev, qp->buf_size, PAGE_SIZE, &qp->buf); if (err) { xsc_ib_err(dev, "err %d\n", err); return err; } qp->sq.qend = qp->buf.direct.buf + qp->sq.offset + sq_size; hw_npages = DIV_ROUND_UP(qp->buf_size, PAGE_SIZE_4K); *inlen = sizeof(**in) + sizeof(*(*in)->req.pas) * hw_npages; *in = xsc_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_buf; } xsc_fill_page_array(&qp->buf, (*in)->req.pas, hw_npages); (*in)->req.pa_num = cpu_to_be16(hw_npages); qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(*qp->sq.wrid), GFP_KERNEL); qp->sq.wr_data = kmalloc_array(qp->sq.wqe_cnt, sizeof(*qp->sq.wr_data), GFP_KERNEL); qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(*qp->rq.wrid), GFP_KERNEL); qp->sq.w_list = kmalloc_array(qp->sq.wqe_cnt, sizeof(*qp->sq.w_list), GFP_KERNEL); qp->sq.wqe_head = kmalloc_array(qp->sq.wqe_cnt, sizeof(*qp->sq.wqe_head), GFP_KERNEL); if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || !qp->sq.w_list || !qp->sq.wqe_head) { err = -ENOMEM; goto err_wrid; } qp->create_type = XSC_QP_KERNEL; if (init_attr->qp_type == IB_QPT_GSI) { qp->sq.mad_index = 0; qp->sq.mad_queue_depth = MAD_QUEUE_DEPTH; qp->sq.hdr_size = MAX_QP1_SQ_HDR_SIZE_V2 * MAD_QUEUE_DEPTH; qp->sq.hdr_buf = dma_alloc_coherent(dev->ib_dev.dma_device, qp->sq.hdr_size, &qp->sq.hdr_dma, GFP_KERNEL); if (!qp->sq.hdr_buf) { err = -ENOMEM; xsc_ib_err(dev, "Failed to create sq_hdr_buf"); goto err_wrid; } } return 0; err_wrid: kfree(qp->sq.wqe_head); kfree(qp->sq.w_list); kfree(qp->sq.wrid); kfree(qp->sq.wr_data); kfree(qp->rq.wrid); err_buf: xsc_buf_free(dev->xdev, &qp->buf); return err; } static void destroy_qp_kernel(struct xsc_ib_dev *dev, struct xsc_ib_qp *qp) { if (qp->sq.hdr_buf) dma_free_coherent(dev->ib_dev.dma_device, qp->sq.hdr_size, qp->sq.hdr_buf, qp->sq.hdr_dma); kfree(qp->sq.wqe_head); kfree(qp->sq.w_list); kfree(qp->sq.wrid); kfree(qp->sq.wr_data); kfree(qp->rq.wrid); xsc_buf_free(dev->xdev, &qp->buf); } static u8 ib_to_xsc_qp_type(enum ib_qp_type qp_type, __u32 flags) { if (qp_type == IB_QPT_RC) { return XSC_QUEUE_TYPE_RDMA_RC; } else if ((qp_type == IB_QPT_GSI) || (qp_type == IB_QPT_SMI)) { return XSC_QUEUE_TYPE_RDMA_MAD; } else if (qp_type == IB_QPT_RAW_PACKET) { if (flags & XSC_QP_FLAG_RAWPACKET_TSO) return XSC_QUEUE_TYPE_RAW_TSO; else if (flags & XSC_QP_FLAG_RAWPACKET_TX) return XSC_QUEUE_TYPE_RAW_TX; else return XSC_QUEUE_TYPE_RAW; } else { return XSC_QUEUE_TYPE_INVALID; } } static int create_qp_common(struct xsc_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct xsc_ib_qp *qp) { struct xsc_ib_resources *devr = &dev->devr; struct xsc_ib_create_qp_resp resp; struct xsc_create_qp_mbox_in *in = NULL; struct xsc_ib_create_qp ucmd; int inlen = sizeof(*in); int err; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); spin_lock_init(&qp->lock); if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = XSC_WQE_CTRL_CQ_UPDATE; if (pd && pd->uobject) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { xsc_ib_dbg(dev, "copy failed\n"); return -EFAULT; } qp->wq_sig = !!(ucmd.flags & XSC_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & XSC_QP_FLAG_SCATTER_CQE); } else { qp->wq_sig = !!wq_signature; } xsc_ib_dbg(dev, "ucmd.flags=0x%x\n", ucmd.flags); qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, (pd && pd->uobject) ? &ucmd : NULL); if (err) { xsc_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { if (pd->uobject) { err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); if (err) xsc_ib_dbg(dev, "err %d\n", err); } else { err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); if (err) xsc_ib_dbg(dev, "err %d\n", err); else qp->pa_lkey = to_mpd(pd)->pa_lkey; } if (err) return err; } else { in = xsc_vzalloc(sizeof(*in)); if (!in) return -ENOMEM; qp->create_type = XSC_QP_EMPTY; } xsc_ib_dbg(dev, "[%s:%d]:qp_type=%d\n", __func__, __LINE__, init_attr->qp_type); if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; in->req.qp_type = init_attr->qp_type; if (is_qp1(init_attr->qp_type)) in->req.input_qpn = cpu_to_be16(1); if (init_attr->qp_type != XSC_IB_QPT_REG_UMR) in->req.pdn = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); if (qp->rq.ds_cnt) in->req.log_rq_sz = ilog2(qp->rq.ds_cnt); if (qp->sq.ds_cnt) in->req.log_sq_sz = ilog2(qp->sq.ds_cnt); else in->req.log_sq_sz = ilog2(0x80); if (init_attr->send_cq) { qp->send_cq = init_attr->send_cq; in->req.cqn_send = to_xcq(init_attr->send_cq)->xcq.cqn; in->req.cqn_send = cpu_to_be16(in->req.cqn_send); #ifndef MSIX_SUPPORT init_attr->send_cq->comp_handler(init_attr->send_cq, init_attr->send_cq->cq_context); #endif } if (init_attr->recv_cq) { qp->recv_cq = init_attr->recv_cq; in->req.cqn_recv = to_xcq(init_attr->recv_cq)->xcq.cqn; in->req.cqn_recv = cpu_to_be16(in->req.cqn_recv); } in->req.qp_type = ib_to_xsc_qp_type(init_attr->qp_type, ucmd.flags); xsc_ib_dbg(dev, "[%s:%d]:req.qp_type=%d\n", __func__, __LINE__, in->req.qp_type); if (in->req.qp_type == XSC_QUEUE_TYPE_INVALID) goto err_create; in->req.glb_funcid = cpu_to_be16(dev->xdev->glb_func_id); in->req.logic_port = cpu_to_be16(dev->xdev->logic_port); qp->xqp.qp_type_internal = in->req.qp_type; err = xsc_core_create_qp(dev->xdev, &qp->xqp, in, inlen); if (err) { xsc_ib_dbg(dev, "create qp failed\n"); goto err_create; } xsc_vfree(in); qp->doorbell_qpn = qp->xqp.qpn; qp->xqp.event = xsc_ib_qp_event; qp->xqp.qp_type = init_attr->qp_type; return 0; err_create: if (qp->create_type == XSC_QP_USER) destroy_qp_user(pd, qp); else if (qp->create_type == XSC_QP_KERNEL) destroy_qp_kernel(dev, qp); xsc_vfree(in); return err; } static void xsc_ib_lock_cqs(struct xsc_ib_cq *send_cq, struct xsc_ib_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { if (send_cq) { if (recv_cq) { if (send_cq->xcq.cqn < recv_cq->xcq.cqn) { spin_lock_irq(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else if (send_cq->xcq.cqn == recv_cq->xcq.cqn) { spin_lock_irq(&send_cq->lock); __acquire(&recv_cq->lock); } else { spin_lock_irq(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } else { spin_lock_irq(&send_cq->lock); } } else if (recv_cq) { spin_lock_irq(&recv_cq->lock); } } static void xsc_ib_unlock_cqs(struct xsc_ib_cq *send_cq, struct xsc_ib_cq *recv_cq) __releases(&send_cq->lock) __releases(&recv_cq->lock) { if (send_cq) { if (recv_cq) { if (send_cq->xcq.cqn < recv_cq->xcq.cqn) { spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else if (send_cq->xcq.cqn == recv_cq->xcq.cqn) { __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else { spin_unlock(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); } } else { spin_unlock_irq(&send_cq->lock); } } else if (recv_cq) { spin_unlock_irq(&recv_cq->lock); } } static struct xsc_ib_pd *get_pd(struct xsc_ib_qp *qp) { return to_mpd(qp->ibqp.pd); } static void get_cqs(struct xsc_ib_qp *qp, struct xsc_ib_cq **send_cq, struct xsc_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { case IB_QPT_XRC_TGT: *send_cq = NULL; *recv_cq = NULL; break; case XSC_IB_QPT_REG_UMR: case IB_QPT_XRC_INI: *send_cq = to_xcq(qp->ibqp.send_cq); *recv_cq = NULL; break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: *send_cq = to_xcq(qp->ibqp.send_cq); *recv_cq = to_xcq(qp->ibqp.recv_cq); break; case IB_QPT_RAW_PACKET: case IB_QPT_MAX: default: *send_cq = NULL; *recv_cq = NULL; break; } } static void destroy_qp_common(struct xsc_ib_dev *dev, struct xsc_ib_qp *qp) { struct xsc_ib_cq *send_cq, *recv_cq; struct xsc_modify_qp_mbox_in *in; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return; if (qp->xqp.qp_type_internal == XSC_QUEUE_TYPE_RAW || qp->state != IB_QPS_RESET) if (xsc_core_qp_modify(dev->xdev, to_xsc_state(qp->state), XSC_QP_STATE_RST, in, sizeof(*in), &qp->xqp)) xsc_ib_warn(dev, "modify QP %06x to RESET failed\n", qp->xqp.qpn); get_cqs(qp, &send_cq, &recv_cq); if (qp->create_type == XSC_QP_KERNEL) { xsc_ib_lock_cqs(send_cq, recv_cq); __xsc_ib_cq_clean(recv_cq, qp->xqp.qpn); if (send_cq != recv_cq) __xsc_ib_cq_clean(send_cq, qp->xqp.qpn); xsc_ib_unlock_cqs(send_cq, recv_cq); } err = xsc_core_destroy_qp(dev->xdev, &qp->xqp); if (err) xsc_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->xqp.qpn); kfree(in); if (qp->create_type == XSC_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == XSC_QP_USER) destroy_qp_user(&get_pd(qp)->ibpd, qp); } static const char *ib_qp_type_str(enum ib_qp_type type) { switch (type) { case IB_QPT_SMI: return "IB_QPT_SMI"; case IB_QPT_GSI: return "IB_QPT_GSI"; case IB_QPT_RC: return "IB_QPT_RC"; case IB_QPT_UC: return "IB_QPT_UC"; case IB_QPT_UD: return "IB_QPT_UD"; case IB_QPT_RAW_IPV6: return "IB_QPT_RAW_IPV6"; case IB_QPT_RAW_ETHERTYPE: return "IB_QPT_RAW_ETHERTYPE"; case IB_QPT_XRC_INI: return "IB_QPT_XRC_INI"; case IB_QPT_XRC_TGT: return "IB_QPT_XRC_TGT"; case IB_QPT_RAW_PACKET: return "IB_QPT_RAW_PACKET"; case XSC_IB_QPT_REG_UMR: return "XSC_IB_QPT_REG_UMR"; case IB_QPT_MAX: default: return "Invalid QP type"; } } int xsc_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct xsc_ib_dev *dev; struct xsc_ib_qp *qp; struct ib_pd *pd = ibqp->pd; int err; qp = to_xqp(ibqp); if (pd) { dev = to_mdev(pd->device); } else { /* being cautious here */ if (init_attr->qp_type != IB_QPT_XRC_TGT && init_attr->qp_type != XSC_IB_QPT_REG_UMR) { pr_warn("%s: no PD for transport %s\n", __func__, ib_qp_type_str(init_attr->qp_type)); return RET_VALUE(-EINVAL); } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } switch (init_attr->qp_type) { case IB_QPT_RC: case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RAW_PACKET: err = create_qp_common(dev, pd, init_attr, udata, qp); if (err) { xsc_ib_dbg(dev, "create_qp_common failed\n"); kfree(qp); return RET_VALUE(err); } if (is_qp0(init_attr->qp_type)) { qp->ibqp.qp_num = 0; } else if (is_qp1(init_attr->qp_type)) { qp->ibqp.qp_num = 1; dev->xdev->gsi_qpn = qp->xqp.qpn; } else { qp->ibqp.qp_num = qp->xqp.qpn; } xsc_ib_dbg(dev, "ib qpnum 0x%x, qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", qp->ibqp.qp_num, qp->xqp.qpn, to_xcq(init_attr->recv_cq)->xcq.cqn, to_xcq(init_attr->send_cq)->xcq.cqn); break; case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_MAX: default: xsc_ib_dbg(dev, "unsupported qp type %d\n", init_attr->qp_type); /* Don't support raw QPs */ return RET_VALUE(-EINVAL); } return 0; } xsc_ib_destroy_qp_def() { struct xsc_ib_dev *dev = to_mdev(qp->device); struct xsc_ib_qp *xqp = to_xqp(qp); destroy_qp_common(dev, xqp); return 0; } static inline u16 xsc_calc_udp_sport(u32 lqpn, u32 rqpn) { unsigned char *p; u8 ports[2]; u16 sport; u64 tqpn; tqpn = ((u64)(lqpn & 0xffffff)) * ((u64)(rqpn & 0xffffff)); p = (unsigned char *)&tqpn; ports[0] = p[0] ^ p[2] ^ p[4]; ports[1] = p[1] ^ p[3] ^ p[5]; sport = *((u16 *)ports) | 0xC000; return sport; } static inline void xsc_path_set_udp_sport(struct xsc_qp_path *path, const struct rdma_ah_attr *ah, u32 lqpn, u32 rqpn) { if ((ah->grh.flow_label & UDP_SPORT_MASK) != 0) { if ((ah->grh.flow_label & UDP_SPORT_MASK_EN) == 0) path->sport = cpu_to_be16(xsc_flow_label_to_udp_sport(ah->grh.flow_label)); else path->sport = cpu_to_be16((ah->grh.flow_label & UDP_SPORT_MASK) >> UDP_SPORT_OFFSET); } else { path->sport = cpu_to_be16(xsc_calc_udp_sport(lqpn, rqpn)); } } static int xsc_set_path(struct xsc_ib_dev *dev, const struct rdma_ah_attr *ah, struct xsc_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr, struct xsc_ib_qp *qp) { struct ib_global_route *grh = rdma_ah_retrieve_grh((struct rdma_ah_attr *)ah); union ib_gid *dgid = &grh->dgid; const struct ib_gid_attr *sgid_attr = grh->sgid_attr; union ib_gid *sgid = &((struct ib_gid_attr *)sgid_attr)->gid; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; int global_pcp, global_dscp; if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(rdma_ah_get_ah_flags(ah) & IB_AH_GRH)) return -EINVAL; if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) xsc_path_set_udp_sport(path, ah, qp->ibqp.qp_num, attr->dest_qp_num); if (sgid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) { xsc_ib_err(dev, "gid type not ROCEv2\n"); return -EINVAL; } global_dscp = get_global_force_dscp(); if (global_dscp == GLOBAL_UNSET_FORCE_VALUE) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; else path->ecn_dscp = global_dscp; path->hop_limit = grh->hop_limit; rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); if (sgid_addr._sockaddr.sa_family == AF_INET && dgid_addr._sockaddr.sa_family == AF_INET) { memcpy(path->sip, &sgid_addr._sockaddr_in.sin_addr.s_addr, sizeof(struct in_addr)); memcpy(path->dip, &dgid_addr._sockaddr_in.sin_addr.s_addr, sizeof(struct in_addr)); path->af_type = AF_INET; } else if (sgid_addr._sockaddr.sa_family == AF_INET6 && dgid_addr._sockaddr.sa_family == AF_INET6) { memcpy(path->sip, &sgid_addr._sockaddr_in6.sin6_addr.s6_addr, sizeof(path->sip)); memcpy(path->dip, &dgid_addr._sockaddr_in6.sin6_addr.s6_addr, sizeof(path->dip)); path->af_type = AF_INET6; } else { return -EINVAL; } ether_addr_copy(path->smac, sgid_attr->ndev->dev_addr); ether_addr_copy(path->smac, dev->netdev->dev_addr); memcpy(path->dmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (is_vlan_dev(sgid_attr->ndev)) { path->vlan_valid = 1; path->vlan_id = cpu_to_be16(vlan_dev_vlan_id(sgid_attr->ndev)); global_pcp = get_global_force_pcp(); if (global_pcp == GLOBAL_UNSET_FORCE_VALUE) path->dci_cfi_prio_sl = (ah->sl & 0x7); else path->dci_cfi_prio_sl = global_pcp; } else { path->vlan_valid = 0; } } xsc_core_dbg(dev->xdev, "path dscp %d pcp %d\n", path->ecn_dscp, path->dci_cfi_prio_sl); return 0; } static int __xsc_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct xsc_ib_dev *dev = to_mdev(ibqp->device); struct xsc_ib_qp *qp = to_xqp(ibqp); struct xsc_ib_cq *send_cq, *recv_cq; struct xsc_qp_context *context; struct xsc_modify_qp_mbox_in *in; struct xsc_qp_path path; int sqd_event; int err; struct xsc_lag *ldev = xsc_lag_dev_get(dev->xdev); u8 lag_port_num = ARRAY_SIZE(ldev->pf); in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; context = &qp->ctx; if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu != IB_MTU_1024 && attr->path_mtu != IB_MTU_4096) { xsc_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu); } context->mtu_mode = (attr->path_mtu <= IB_MTU_1024) ? 0 : 1; } if (attr_mask & IB_QP_DEST_QPN) context->remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_AV) { err = xsc_set_path(dev, &attr->ah_attr, &path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, attr_mask, 0, attr, qp); if (err) goto out; context->src_udp_port = path.sport; context->dscp = path.ecn_dscp; context->hop_limit = path.hop_limit; context->ip_type = (path.af_type == AF_INET ? 0 : 1); context->ip_type = cpu_to_be16(context->ip_type); memcpy(context->dip, path.dip, sizeof(context->dip)); memcpy(context->sip, path.sip, sizeof(context->sip)); memcpy(context->dmac, path.dmac, sizeof(path.dmac)); memcpy(context->smac, path.smac, sizeof(path.smac)); context->vlan_valid = path.vlan_valid; context->dci_cfi_prio_sl = path.dci_cfi_prio_sl; context->vlan_id = path.vlan_id; if (ldev && __xsc_lag_is_roce(ldev)) { context->lag_id = cpu_to_be16(ldev->lag_id); context->lag_sel_en = 1; if ((attr->ah_attr.grh.flow_label & LAG_PORT_NUM_MASK_EN) != 0) context->lag_sel = ((attr->ah_attr.grh.flow_label & LAG_PORT_NUM_MASK) >> LAG_PORT_NUM_OFFSET) % lag_port_num; else context->lag_sel = qp->xqp.qpn % XSC_MAX_PORTS; } } if (attr_mask & IB_QP_RNR_RETRY) context->rnr_retry = attr->rnr_retry; if (attr_mask & IB_QP_RETRY_CNT) context->retry_cnt = attr->retry_cnt; if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); if (attr_mask & IB_QP_RQ_PSN) context->next_recv_psn = cpu_to_be32(attr->rq_psn); if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; memcpy(&in->ctx, context, sizeof(*context)); err = xsc_core_qp_modify(dev->xdev, to_xsc_state(cur_state), to_xsc_state(new_state), in, sqd_event, &qp->xqp); if (err) goto out; qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { get_cqs(qp, &send_cq, &recv_cq); xsc_ib_cq_clean(recv_cq, qp->xqp.qpn); if (send_cq != recv_cq) xsc_ib_cq_clean(send_cq, qp->xqp.qpn); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; qp->sq.last_poll = 0; } out: kfree(in); return err; } int xsc_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct xsc_ib_dev *dev = to_mdev(ibqp->device); struct xsc_ib_qp *qp = to_xqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; if (!is_support_rdma(dev->xdev)) { xsc_ib_dbg(dev, "rdma unsupported,%s no action.\n", __func__); return 0; } mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; xsc_ib_dbg(dev, "cur_state:%u, new_state:%u attr_mask:0x%x\n", cur_state, new_state, attr_mask); if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->xdev->caps.num_ports)) { xsc_ib_dbg(dev, "erro port num\n"); goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->xdev->caps.max_ra_res_qp) { xsc_ib_err(dev, "rd atomic:%u exeeded", attr->max_rd_atomic); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->xdev->caps.max_ra_req_qp) { xsc_ib_err(dev, "dest rd atomic:%u exeeded", attr->max_dest_rd_atomic); goto out; } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; goto out; } err = __xsc_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: mutex_unlock(&qp->mutex); return err; } static int xsc_wq_overflow(struct xsc_ib_wq *wq, int nreq, struct xsc_ib_cq *cq) { unsigned int cur; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max_post)) return 0; spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max_post; } #ifdef XSC_DEBUG static void dump_wqe(struct xsc_ib_qp *qp, int idx) { struct xsc_send_wqe_ctrl_seg *seg; struct xsc_ib_dev *dev = to_mdev(qp->ibqp.device); u32 *p = NULL; int i; seg = xsc_get_send_wqe(qp, idx); xsc_ib_dbg(dev, "current wqe:%p index:%d\n", seg, idx); for (i = 0; i < 4; i++) { p = (u32 *)get_seg_wqe(seg, i); xsc_ib_dbg(dev, "%08x %08x %08x %08x\n", p[0], p[1], p[2], p[3]); } } #endif static inline void xsc_post_send_db(struct xsc_ib_qp *qp, struct xsc_core_device *xdev, int nreq) { u16 next_pid; union xsc_db_data db; if (unlikely(!nreq)) return; qp->sq.head += nreq; next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); db.sq_next_pid = next_pid; db.sqn = qp->doorbell_qpn; /* * Make sure that descriptors are written before * updating doorbell record and ringing the doorbell */ wmb(); writel(db.raw_data, REG_ADDR(xdev, xdev->regs.tx_db)); } static inline u32 xsc_crc32(struct xsc_ib_dev *dev, u32 crc, u8 *buf, size_t len) { u32 i; for (i = 0; i < len; i++) crc = dev->crc_32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); return crc; } #define BTH_QPN_MASK (0x00ffffff) #define BTH_PSN_MASK (0x00ffffff) /* Compute a partial ICRC for all the IB transport headers. */ u32 xsc_icrc_hdr(struct xsc_ib_dev *dev, void *pkt, u32 size, u32 *icrc) { struct iphdr *ip4h = NULL; struct ipv6hdr *ip6h = NULL; struct udphdr *udph; struct ib_unpacked_eth *eth; struct rxe_bth *bth; struct ib_unpacked_deth *deth; struct ib_unpacked_vlan *vlan; int crc; int crc_field_len; __be16 l3_type; u8 *l3_start; int hdr_size; /* pseudo header buffer size is calculate using ipv6 header size since * it is bigger than ipv4 */ u8 pshdr[sizeof(struct udphdr) + sizeof(struct ipv6hdr) + sizeof(*bth) + sizeof(*deth)]; eth = pkt; if (eth->type == htons(ETH_P_8021Q)) { vlan = (struct ib_unpacked_vlan *)(eth + 1); l3_type = vlan->type; l3_start = (u8 *)(vlan + 1); size -= 4; } else { l3_type = eth->type; l3_start = (u8 *)(eth + 1); } hdr_size = sizeof(struct udphdr) + (l3_type == htons(ETH_P_IP) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)); crc_field_len = hdr_size + sizeof(*bth) + sizeof(*deth); if (crc_field_len != size) { xsc_ib_err(dev, "Unmatched hdr: expect %d actual %d\n", crc_field_len, size); return -EINVAL; } ip4h = (struct iphdr *)(l3_start); ip6h = (struct ipv6hdr *)(l3_start); udph = (struct udphdr *)(ip4h + 1); bth = (struct rxe_bth *)(udph + 1); memcpy(pshdr, l3_start, crc_field_len); /* This seed is the result of computing a CRC with a seed of * 0xfffffff and 8 bytes of 0xff representing a masked LRH. */ crc = 0xdebb20e3; if (l3_type == htons(ETH_P_IP)) { /* IPv4 */ memcpy(pshdr, ip4h, hdr_size); ip4h = (struct iphdr *)pshdr; udph = (struct udphdr *)(ip4h + 1); ip4h->ttl = 0xff; ip4h->check = CSUM_MANGLED_0; ip4h->tos = 0xff; } else { /* IPv6 */ memcpy(pshdr, ip6h, hdr_size); ip6h = (struct ipv6hdr *)pshdr; udph = (struct udphdr *)(ip6h + 1); memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl)); ip6h->priority = 0xf; ip6h->hop_limit = 0xff; } udph->check = CSUM_MANGLED_0; bth = (struct rxe_bth *)(udph + 1); /* exclude bth.resv8a */ bth->qpn |= cpu_to_be32(~BTH_QPN_MASK); *icrc = xsc_crc32(dev, crc, pshdr, crc_field_len); return 0; } /* Routine for sending QP1 packets for RoCE V1 an V2 */ int build_qp1_send_v2(struct xsc_ib_dev *dev, struct xsc_ib_qp *qp, const struct ib_send_wr *wr, struct ib_sge *sge, int payload_size, u32 *crc) { struct xsc_ib_ah *ah = container_of(ud_wr((struct ib_send_wr *)wr)->ah, struct xsc_ib_ah, ibah); const struct ib_gid_attr *sgid_attr = ah->ibah.sgid_attr; u16 ether_type; union ib_gid dgid; bool is_eth = false; bool is_vlan = false; bool is_grh = false; bool is_udp = false; u8 ip_version = 0; u16 vlan_id = 0xFFFF; int rc = 0; int cm_pcp = 0; void *hdr_buf; memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); if (!qp->sq.hdr_buf) { xsc_ib_err(dev, "QP1 buffer is empty!"); return -ENOMEM; } hdr_buf = (u8 *)qp->sq.hdr_buf + MAX_QP1_SQ_HDR_SIZE_V2 * qp->sq.mad_index; if (!sgid_attr || !sgid_attr->ndev) { xsc_ib_err(dev, "sgid_addr or ndev is null\n"); return -ENXIO; } if (is_vlan_dev(sgid_attr->ndev)) vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; memcpy(&dgid.raw, &ah->av.rgid, 16); if (is_udp) { if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { ip_version = 4; ether_type = ETH_P_IP; } else { ip_version = 6; ether_type = ETH_P_IPV6; } is_grh = false; } else { ether_type = ETH_P_IBOE; is_grh = true; } is_eth = true; is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false; ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh, ip_version, is_udp, 0, &qp->qp1_hdr); /* ETH */ ether_addr_copy(qp->qp1_hdr.eth.dmac_h, ah->av.rmac); ether_addr_copy(qp->qp1_hdr.eth.smac_h, dev->netdev->dev_addr); /* For vlan, check the sgid for vlan existence */ if (!is_vlan) { qp->qp1_hdr.eth.type = cpu_to_be16(ether_type); } else { if (dev->cm_pcp != DSCP_PCP_UNSET) cm_pcp = dev->cm_pcp << 13; else cm_pcp = (iboe_tos_to_sl(sgid_attr->ndev, ah->av.tclass) << 13); qp->qp1_hdr.vlan.type = cpu_to_be16(ether_type); qp->qp1_hdr.vlan.tag = cpu_to_be16(vlan_id | cm_pcp); } if (ip_version == 4) { if (dev->cm_dscp != DSCP_PCP_UNSET) qp->qp1_hdr.ip4.tos = dev->cm_dscp << 2; else qp->qp1_hdr.ip4.tos = 0; qp->qp1_hdr.ip4.id = 0; qp->qp1_hdr.ip4.frag_off = htons(IP_DF); qp->qp1_hdr.ip4.ttl = ah->av.hop_limit; memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4); memcpy(&qp->qp1_hdr.ip4.daddr, ah->av.rgid + 12, 4); qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); } if (is_udp) { qp->qp1_hdr.udp.dport = htons(ROCE_V2_UDP_DPORT); qp->qp1_hdr.udp.sport = htons(ah->av.udp_sport); qp->qp1_hdr.udp.csum = 0; xsc_ib_dbg(dev, "CM packet used udp_sport=%d\n", ah->av.udp_sport); } /* BTH */ if (wr->opcode == IB_WR_SEND_WITH_IMM) { qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; qp->qp1_hdr.immediate_present = 1; } else { qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY; } if (wr->send_flags & IB_SEND_SOLICITED) qp->qp1_hdr.bth.solicited_event = 1; /* pad_count */ qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3; /* P_key for QP1 is for all members */ qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF); qp->qp1_hdr.bth.destination_qpn = IB_QP1; qp->qp1_hdr.bth.ack_req = 0; qp->send_psn++; qp->send_psn &= BTH_PSN_MASK; qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn); /* DETH */ /* Use the priviledged Q_Key for QP1 */ qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY); qp->qp1_hdr.deth.source_qpn = IB_QP1; /* Pack the QP1 to the transmit buffer */ sge->addr = (dma_addr_t)(qp->sq.hdr_dma + MAX_QP1_SQ_HDR_SIZE_V2 * qp->sq.mad_index); sge->lkey = 0xFFFFFFFF; sge->length = MAX_QP1_SQ_HDR_SIZE; ib_ud_header_pack(&qp->qp1_hdr, hdr_buf); /* * Max Header buf size for IPV6 RoCE V2 is 86, * which is same as the QP1 SQ header buffer. * Header buf size for IPV4 RoCE V2 can be 66. * ETH(14) + VLAN(4)+ IP(20) + UDP (8) + BTH(20). * Subtract 20 bytes from QP1 SQ header buf size */ if (is_udp && ip_version == 4) sge->length -= 20; /* * Max Header buf size for RoCE V1 is 78. * ETH(14) + VLAN(4) + GRH(40) + BTH(20). * Subtract 8 bytes from QP1 SQ header buf size */ if (!is_udp) sge->length -= 8; /* Subtract 4 bytes for non vlan packets */ if (!is_vlan) sge->length -= 4; rc = xsc_icrc_hdr(dev, hdr_buf, sge->length - sizeof(struct ib_unpacked_eth), crc); if (rc) { xsc_ib_err(dev, "CRC error: hdr size %ld\n", sge->length - sizeof(struct ib_unpacked_eth)); } return rc; } static void zero_send_ds(struct xsc_ib_qp *qp, int idx) { void *seg; int i; int ds_num; u64 *p; ds_num = XSC_SEND_SEG_NUM << (qp->sq.wqe_shift - XSC_SEND_WQE_SHIFT); seg = (void *)xsc_get_send_wqe(qp, idx); for (i = 1; i < ds_num; i++) { p = get_seg_wqe(seg, i); p[0] = 0; p[1] = 0; } } int xsc_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct xsc_ib_qp *qp = to_xqp(ibqp); struct xsc_ib_dev *dev = to_mdev(ibqp->device); void *seg; struct xsc_send_wqe_ctrl_seg *ctrl; struct xsc_wqe_data_seg *data_seg; u32 crc; int nreq; int err = 0; int i; unsigned int idx; unsigned long irqflag = 0; struct ib_sge sg; u8 *cur_p = NULL; u8 *mad_send_base = NULL; struct ib_wc wc; unsigned long qp_irqflag = 0; if (!is_support_rdma(dev->xdev)) { xsc_ib_dbg(dev, "rdma unsupported,%s no action.\n", __func__); return 0; } if (wr->opcode == IB_WR_LOCAL_INV) { spin_lock_irqsave(&qp->lock, qp_irqflag); wc.status = IB_WC_SUCCESS; wc.wr_cqe = wr->wr_cqe; if (xsc_wr_invalidate_mr(dev, wr)) wc.status = IB_WC_GENERAL_ERR; spin_unlock_irqrestore(&qp->lock, qp_irqflag); if (wr->wr_cqe && wr->wr_cqe->done) wr->wr_cqe->done(qp->send_cq, &wc); return 0; } if (wr->opcode == IB_WR_REG_MR) { spin_lock_irqsave(&qp->lock, qp_irqflag); wc.status = IB_WC_SUCCESS; if (xsc_wr_reg_mr(dev, wr)) wc.status = IB_WC_GENERAL_ERR; if (wr->wr_cqe && wr->wr_cqe->done) wr->wr_cqe->done(qp->send_cq, &wc); spin_unlock_irqrestore(&qp->lock, qp_irqflag); } spin_lock_irqsave(&qp->sq.lock, irqflag); for (nreq = 0; wr; ++nreq, wr = wr->next) { unsigned int seg_index = 1; unsigned int msg_len = 0; struct ib_sge *sgl = &wr->sg_list[0]; int sg_n = wr->num_sge; if (unlikely(wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(xsc_ib_opcode))) { xsc_ib_err(dev, "bad opcode %d\n", wr->opcode); err = EINVAL; *bad_wr = wr; goto out; } if (unlikely(xsc_wq_overflow(&qp->sq, nreq, to_xcq(qp->ibqp.send_cq)))) { xsc_ib_err(dev, "send work queue overflow\n"); err = ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->sq.max_gs)) { xsc_ib_err(dev, "max gs exceeded %d (max = %d)\n", wr->num_sge, qp->sq.max_gs); err = ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->opcode == IB_WR_RDMA_READ && wr->num_sge > 1)) { xsc_ib_err(dev, "rdma read, max gs exceeded %d (max = 1)\n", wr->num_sge); err = ENOMEM; *bad_wr = wr; goto out; } idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); zero_send_ds(qp, idx); seg = xsc_get_send_wqe(qp, idx); ctrl = seg; ctrl->wqe_id = cpu_to_le16(qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); ctrl->ds_data_num = 0; ctrl->se = wr->send_flags & IB_SEND_SOLICITED ? 1 : 0; ctrl->ce = wr->send_flags & IB_SEND_SIGNALED ? 1 : 0; for (i = 0; i < wr->num_sge; ++i) { if (likely(wr->sg_list[i].length)) msg_len += wr->sg_list[i].length; } ctrl->msg_len = msg_len; ctrl->with_immdt = 0; switch (ibqp->qp_type) { case IB_QPT_RC: ctrl->ds_data_num = wr->num_sge; switch (wr->opcode) { case IB_WR_SEND: break; case IB_WR_SEND_WITH_IMM: ctrl->with_immdt = 1; ctrl->opcode_data = send_ieth(wr); break; case IB_WR_RDMA_WRITE_WITH_IMM: ctrl->with_immdt = 1; ctrl->opcode_data = send_ieth(wr); case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: ctrl->with_immdt = 0; ctrl->ds_data_num++; data_seg = get_seg_wqe(ctrl, seg_index); set_remote_addr_seg(data_seg, msg_len, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); seg_index++; break; case IB_WR_REG_MR: break; default: xsc_ib_err(dev, "debug: opcode:%u NOT supported\n", wr->opcode); err = EPERM; *bad_wr = wr; goto out; } ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; break; case IB_QPT_UD: case IB_QPT_GSI: xsc_ib_dbg(dev, "send MAD packet\n"); ctrl->msg_opcode = XSC_MSG_OPCODE_MAD; ctrl->ds_data_num++; data_seg = get_seg_wqe(ctrl, seg_index); mad_send_base = (u8 *)qp->sq.hdr_buf + MAX_QP1_SQ_HDR_SIZE_V2 * qp->sq.mad_index; build_qp1_send_v2(dev, qp, wr, &sg, msg_len, &crc); cur_p = mad_send_base + sg.length; for (i = 0; i < wr->num_sge; ++i) { if (likely(wr->sg_list[i].length)) memcpy(cur_p, phys_to_virt(dma_to_phys(dev->ib_dev.dma_device, wr->sg_list[i].addr)), wr->sg_list[i].length); cur_p += wr->sg_list[i].length; } crc = xsc_crc32(dev, crc, mad_send_base + sg.length, ctrl->msg_len); ctrl->msg_len += sg.length; seg_index++; *(u32 *)&mad_send_base[ctrl->msg_len] = ~crc; ctrl->msg_len += sizeof(crc); sg.length = ctrl->msg_len; set_local_data_seg(data_seg, &sg); xsc_ib_dbg(dev, "msg_len:%d\n", ctrl->msg_len); qp->sq.mad_index = (qp->sq.mad_index + 1) % MAD_QUEUE_DEPTH; sg_n = 0; break; default: xsc_ib_err(dev, "qp type:%u NOT supported\n", ibqp->qp_type); err = EPERM; *bad_wr = wr; goto out; } if (wr->opcode == IB_WR_REG_MR) { nreq--; continue; } if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { err = set_data_inl_seg(qp, wr, ctrl); if (unlikely(err)) { *bad_wr = wr; xsc_ib_err(dev, "inline layout failed, err %d\n", err); goto out; } } else { for (i = 0; i < sg_n; ++i, ++seg_index) { if (likely(sgl[i].length)) { data_seg = get_seg_wqe(ctrl, seg_index); set_local_data_seg(data_seg, &sgl[i]); } } } qp->sq.wrid[idx] = wr->wr_id; qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += 1; #ifdef XSC_DEBUG dump_wqe(qp, idx); #endif } out: xsc_ib_dbg(dev, "nreq:%d\n", nreq); xsc_post_send_db(qp, dev->xdev, nreq); spin_unlock_irqrestore(&qp->sq.lock, irqflag); return err; } int xsc_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct xsc_ib_qp *qp = to_xqp(ibqp); struct xsc_ib_dev *dev = to_mdev(ibqp->device); struct xsc_core_device *xdev = dev->xdev; struct xsc_wqe_data_seg *recv_head; struct xsc_wqe_data_seg *data_seg; unsigned long flags; int err = 0; u16 next_pid = 0; union xsc_db_data db; int nreq; u16 idx; int i; if (!is_support_rdma(xdev)) { xsc_ib_dbg(dev, "rdma unsupported,%s no action.\n", __func__); return 0; } spin_lock_irqsave(&qp->rq.lock, flags); idx = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(xsc_wq_overflow(&qp->rq, nreq, to_xcq(qp->ibqp.recv_cq)))) { xsc_ib_err(dev, "recv work queue overflow\n"); err = ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->rq.max_gs)) { xsc_ib_err(dev, "max gs exceeded %d (max = %d)\n", wr->num_sge, qp->rq.max_gs); err = EINVAL; *bad_wr = wr; goto out; } recv_head = get_recv_wqe(qp, idx); for (i = 0; i < wr->num_sge; ++i) { if (unlikely(!wr->sg_list[i].length)) continue; data_seg = get_seg_wqe(recv_head, i); data_seg->in_line = 0; WR_LE_64(data_seg->va, wr->sg_list[i].addr); WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); if (is_qp1(qp->xqp.qp_type)) WR_LE_32(data_seg->seg_len, xdev->caps.rx_pkt_len_max); else WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); } qp->rq.wrid[idx] = wr->wr_id; idx = (idx + 1) & (qp->rq.wqe_cnt - 1); } out: if (likely(nreq)) { qp->rq.head += nreq; next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); db.rq_next_pid = next_pid; db.rqn = qp->doorbell_qpn; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); writel(db.raw_data, REG_ADDR(xdev, xdev->regs.rx_db)); } spin_unlock_irqrestore(&qp->rq.lock, flags); return err; } static inline enum ib_qp_state to_ib_qp_state(enum xsc_qp_state xsc_state) { switch (xsc_state) { case XSC_QP_STATE_RST: return IB_QPS_RESET; case XSC_QP_STATE_INIT: return IB_QPS_INIT; case XSC_QP_STATE_RTR: return IB_QPS_RTR; case XSC_QP_STATE_RTS: return IB_QPS_RTS; case XSC_QP_STATE_SQ_DRAINING: case XSC_QP_STATE_SQD: return IB_QPS_SQD; case XSC_QP_STATE_SQER: return IB_QPS_SQE; case XSC_QP_STATE_ERR: return IB_QPS_ERR; default: return -1; } } static inline enum ib_mig_state to_ib_mig_state(int xsc_mig_state) { switch (xsc_mig_state) { case XSC_QP_PM_ARMED: return IB_MIG_ARMED; case XSC_QP_PM_REARM: return IB_MIG_REARM; case XSC_QP_PM_MIGRATED: return IB_MIG_MIGRATED; default: return -1; } } int xsc_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct xsc_ib_dev *dev = to_mdev(ibqp->device); struct xsc_ib_qp *qp = to_xqp(ibqp); struct xsc_query_qp_mbox_out *outb; struct xsc_qp_context *context; int xsc_state; int err = 0; if (!is_support_rdma(dev->xdev)) { xsc_ib_dbg(dev, "rdma unsupported,%s no action.\n", __func__); return 0; } mutex_lock(&qp->mutex); outb = kzalloc(sizeof(*outb), GFP_KERNEL); if (!outb) { err = -ENOMEM; goto out; } context = &outb->ctx; err = xsc_core_qp_query(dev->xdev, &qp->xqp, outb, sizeof(*outb)); if (err) goto out_free; xsc_state = context->state; qp->state = to_ib_qp_state(xsc_state); qp_attr->qp_state = qp->state; qp_attr->path_mtu = context->mtu_mode ? IB_MTU_4096 : IB_MTU_1024; qp_attr->rq_psn = be32_to_cpu(context->next_recv_psn) & 0xffffff; qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = xsc_state == XSC_QP_STATE_SQ_DRAINING; qp_attr->retry_cnt = context->retry_cnt; qp_attr->rnr_retry = context->rnr_retry; qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; qp_attr->cap.max_send_sge = qp->sq.max_gs; } else { qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; qp_attr->cap.max_send_sge = qp->sq.max_gs; } /* We don't support inline sends for kernel QPs (yet), and we * don't know what userspace's value should be. */ qp_attr->cap.max_inline_data = 0; qp_init_attr->cap = qp_attr->cap; qp_init_attr->create_flags = 0; if (qp->flags & XSC_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; qp_init_attr->sq_sig_type = qp->sq_signal_bits & XSC_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; out_free: kfree(outb); out: mutex_unlock(&qp->mutex); return err; }