// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2022 Hisilicon Limited. All rights reserved. */ #include #include #include #include #include #include #include "hns_roce_device.h" #include "hns_roce_dca.h" #define UVERBS_MODULE_NAME hns_ib #include /* DCA mem ageing interval time */ #define DCA_MEM_AGEING_MSES 1000 /* DCA memory */ struct dca_mem { #define DCA_MEM_FLAGS_ALLOCED BIT(0) #define DCA_MEM_FLAGS_REGISTERED BIT(1) u32 flags; struct list_head list; /* link to mem list in dca context */ spinlock_t lock; /* protect the @flags and @list */ int page_count; /* page count in this mem obj */ u64 key; /* register by caller */ u32 size; /* bytes in this mem object */ struct hns_dca_page_state *states; /* record each page's state */ void *pages; /* memory handle for getting dma address */ }; struct dca_mem_attr { u64 key; u64 addr; u32 size; }; static inline void set_dca_page_to_free(struct hns_dca_page_state *state) { state->buf_id = HNS_DCA_INVALID_BUF_ID; state->active = 0; state->lock = 0; } static inline void set_dca_page_to_inactive(struct hns_dca_page_state *state) { state->active = 0; state->lock = 0; } static inline void lock_dca_page_to_attach(struct hns_dca_page_state *state, u32 buf_id) { state->buf_id = HNS_DCA_ID_MASK & buf_id; state->active = 0; state->lock = 1; } static inline void unlock_dca_page_to_active(struct hns_dca_page_state *state, u32 buf_id) { state->buf_id = HNS_DCA_ID_MASK & buf_id; state->active = 1; state->lock = 0; } static inline bool dca_page_is_free(struct hns_dca_page_state *state) { return state->buf_id == HNS_DCA_INVALID_BUF_ID; } static inline bool dca_page_is_attached(struct hns_dca_page_state *state, u32 buf_id) { /* only the own bit needs to be matched. */ return (HNS_DCA_OWN_MASK & buf_id) == (HNS_DCA_OWN_MASK & state->buf_id); } static inline bool dca_page_is_active(struct hns_dca_page_state *state, u32 buf_id) { /* all buf id bits must be matched */ return (HNS_DCA_ID_MASK & buf_id) == state->buf_id && !state->lock && state->active; } static inline bool dca_page_is_allocated(struct hns_dca_page_state *state, u32 buf_id) { return dca_page_is_attached(state, buf_id) && state->lock; } static inline bool dca_page_is_inactive(struct hns_dca_page_state *state) { return !state->lock && !state->active; } static inline bool dca_mem_is_available(struct dca_mem *mem) { return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED); } static void free_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, void *pages) { if (is_user) ib_umem_release(pages); else hns_roce_buf_free(hr_dev, pages); } static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, struct dca_mem *mem, struct dca_mem_attr *attr) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf *kmem; if (is_user) { struct ib_umem *umem; umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); if (IS_ERR(umem)) { ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", PTR_ERR(umem)); return NULL; } mem->page_count = ib_umem_num_dma_blocks(umem, HNS_HW_PAGE_SIZE); return umem; } kmem = hns_roce_buf_alloc(hr_dev, attr->size, HNS_HW_PAGE_SHIFT, HNS_ROCE_BUF_NOSLEEP | HNS_ROCE_BUF_NOFAIL); if (IS_ERR(kmem)) { ibdev_err(ibdev, "failed to alloc kDCA pages, ret = %ld.\n", PTR_ERR(kmem)); return NULL; } mem->page_count = kmem->npages; /* Overwrite the attr->size by actually alloced size */ attr->size = kmem->ntrunks << kmem->trunk_shift; return kmem; } static void init_dca_kmem_states(struct hns_roce_dev *hr_dev, struct hns_dca_page_state *states, int count, struct hns_roce_buf *kmem) { dma_addr_t cur_addr; dma_addr_t pre_addr; int i; pre_addr = 0; for (i = 0; i < kmem->npages && i < count; i++) { cur_addr = hns_roce_buf_page(kmem, i); if (cur_addr - pre_addr != HNS_HW_PAGE_SIZE) states[i].head = 1; pre_addr = cur_addr; } } static void init_dca_umem_states(struct hns_roce_dev *hr_dev, struct hns_dca_page_state *states, int count, struct ib_umem *umem) { struct ib_block_iter biter; dma_addr_t cur_addr; dma_addr_t pre_addr; int i = 0; pre_addr = 0; rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { cur_addr = rdma_block_iter_dma_address(&biter); if (i < count) { if (cur_addr - pre_addr != HNS_HW_PAGE_SIZE) states[i].head = 1; } pre_addr = cur_addr; i++; } } static struct hns_dca_page_state *alloc_dca_states(struct hns_roce_dev *hr_dev, void *pages, int count, bool is_user) { struct hns_dca_page_state *states; states = kcalloc(count, sizeof(*states), GFP_KERNEL); if (!states) return NULL; if (is_user) init_dca_umem_states(hr_dev, states, count, pages); else init_dca_kmem_states(hr_dev, states, count, pages); return states; } #define DCA_MEM_STOP_ITERATE -1 #define DCA_MEM_NEXT_ITERATE -2 static void travel_dca_pages(struct hns_roce_dca_ctx *ctx, void *param, int (*cb)(struct dca_mem *, int, void *)) { struct dca_mem *mem, *tmp; unsigned long flags; bool avail; int ret; int i; spin_lock_irqsave(&ctx->pool_lock, flags); list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { spin_unlock_irqrestore(&ctx->pool_lock, flags); spin_lock(&mem->lock); avail = dca_mem_is_available(mem); ret = 0; for (i = 0; avail && i < mem->page_count; i++) { ret = cb(mem, i, param); if (ret == DCA_MEM_STOP_ITERATE || ret == DCA_MEM_NEXT_ITERATE) break; } spin_unlock(&mem->lock); spin_lock_irqsave(&ctx->pool_lock, flags); if (ret == DCA_MEM_STOP_ITERATE) goto done; } done: spin_unlock_irqrestore(&ctx->pool_lock, flags); } struct dca_get_alloced_pages_attr { u32 buf_id; dma_addr_t *pages; u32 total; u32 max; }; static int get_alloced_kmem_proc(struct dca_mem *mem, int index, void *param) { struct dca_get_alloced_pages_attr *attr = param; struct hns_dca_page_state *states = mem->states; struct hns_roce_buf *kmem = mem->pages; u32 i; for (i = 0; i < kmem->npages; i++) { if (dca_page_is_allocated(&states[i], attr->buf_id)) { attr->pages[attr->total++] = hns_roce_buf_page(kmem, i); if (attr->total >= attr->max) return DCA_MEM_STOP_ITERATE; } } return DCA_MEM_NEXT_ITERATE; } static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) { struct dca_get_alloced_pages_attr *attr = param; struct hns_dca_page_state *states = mem->states; struct ib_umem *umem = mem->pages; struct ib_block_iter biter; u32 i = 0; rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { if (dca_page_is_allocated(&states[i], attr->buf_id)) { attr->pages[attr->total++] = rdma_block_iter_dma_address(&biter); if (attr->total >= attr->max) return DCA_MEM_STOP_ITERATE; } i++; } return DCA_MEM_NEXT_ITERATE; } /* user DCA is managed by ucontext, kernel DCA is managed by device */ static inline struct hns_roce_dca_ctx * to_hr_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) { return uctx ? &uctx->dca_ctx : &hr_dev->dca_ctx; } static inline struct hns_roce_dca_ctx * hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_ucontext *uctx = NULL; if (hr_qp->ibqp.pd->uobject) uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); return to_hr_dca_ctx(hr_dev, uctx); } static int config_dca_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, dma_addr_t *pages, int page_count) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mtr *mtr = &hr_qp->mtr; int ret; ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); if (ret) { ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); return ret; } if (hr_dev->hw->set_dca_buf) { ret = hr_dev->hw->set_dca_buf(hr_dev, hr_qp); if (ret) { ibdev_err(ibdev, "failed to set DCA to HW, ret = %d.\n", ret); return ret; } } return 0; } static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) { struct dca_get_alloced_pages_attr attr = {}; dma_addr_t *pages; int ret; /* alloc a tmp array to store buffer's dma address */ pages = kcalloc(count, sizeof(dma_addr_t), GFP_ATOMIC); if (!pages) return -ENOMEM; attr.buf_id = buf_id; attr.pages = pages; attr.max = count; if (hr_qp->ibqp.uobject) travel_dca_pages(ctx, &attr, get_alloced_umem_proc); else travel_dca_pages(ctx, &attr, get_alloced_kmem_proc); if (attr.total != count) { ibdev_err(&hr_dev->ib_dev, "failed to get DCA page %u != %u.\n", attr.total, count); ret = -ENOMEM; goto err_get_pages; } ret = config_dca_qpc(hr_dev, hr_qp, pages, count); err_get_pages: /* drop tmp array */ kfree(pages); return ret; } static void unregister_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, struct dca_mem *mem) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); bool is_user = !!uctx; unsigned long flags; void *states, *pages; spin_lock_irqsave(&ctx->pool_lock, flags); spin_lock(&mem->lock); mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; mem->page_count = 0; pages = mem->pages; mem->pages = NULL; states = mem->states; mem->states = NULL; spin_unlock(&mem->lock); ctx->free_mems--; ctx->free_size -= mem->size; ctx->total_size -= mem->size; spin_unlock_irqrestore(&ctx->pool_lock, flags); kfree(states); free_dca_pages(hr_dev, is_user, pages); } static int register_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, struct dca_mem *mem, struct dca_mem_attr *attr) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); bool is_user = !!uctx; void *states, *pages; unsigned long flags; pages = alloc_dca_pages(hr_dev, is_user, mem, attr); if (!pages) return -ENOMEM; states = alloc_dca_states(hr_dev, pages, mem->page_count, is_user); if (!states) { free_dca_pages(hr_dev, is_user, pages); return -ENOMEM; } spin_lock_irqsave(&ctx->pool_lock, flags); spin_lock(&mem->lock); mem->pages = pages; mem->states = states; mem->key = attr->key; mem->size = attr->size; mem->flags |= DCA_MEM_FLAGS_REGISTERED; spin_unlock(&mem->lock); ctx->free_mems++; ctx->free_size += attr->size; ctx->total_size += attr->size; spin_unlock_irqrestore(&ctx->pool_lock, flags); return 0; } struct dca_page_clear_attr { u32 buf_id; u32 max_pages; u32 clear_pages; }; static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) { struct hns_dca_page_state *state = &mem->states[index]; struct dca_page_clear_attr *attr = param; if (dca_page_is_attached(state, attr->buf_id)) { set_dca_page_to_free(state); attr->clear_pages++; } if (attr->clear_pages >= attr->max_pages) return DCA_MEM_STOP_ITERATE; else return 0; } static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) { struct dca_page_clear_attr attr = {}; attr.buf_id = buf_id; attr.max_pages = count; travel_dca_pages(ctx, &attr, clear_dca_pages_proc); } struct dca_page_assign_attr { u32 buf_id; int unit; int total; int max; }; static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) { bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); return head ? is_free : is_free && !state->head; } static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) { struct dca_page_assign_attr *attr = param; struct hns_dca_page_state *state; int checked_pages = 0; int start_index = 0; int free_pages = 0; int i; /* Check the continuous pages count is not smaller than unit count */ for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { checked_pages++; state = &mem->states[i]; if (dca_page_is_allocable(state, free_pages == 0)) { if (free_pages == 0) start_index = i; free_pages++; } else { free_pages = 0; } } if (free_pages < attr->unit) return DCA_MEM_NEXT_ITERATE; for (i = 0; i < free_pages; i++) { state = &mem->states[start_index + i]; lock_dca_page_to_attach(state, attr->buf_id); attr->total++; } if (attr->total >= attr->max) return DCA_MEM_STOP_ITERATE; return checked_pages; } static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, u32 unit) { struct dca_page_assign_attr attr = {}; attr.buf_id = buf_id; attr.unit = unit; attr.max = count; travel_dca_pages(ctx, &attr, assign_dca_pages_proc); return attr.total; } struct dca_page_active_attr { u32 buf_id; u32 max_pages; u32 alloc_pages; u32 dirty_mems; }; static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) { struct dca_page_active_attr *attr = param; struct hns_dca_page_state *state; bool changed = false; bool stop = false; int i, free_pages; free_pages = 0; for (i = 0; !stop && i < mem->page_count; i++) { state = &mem->states[i]; if (dca_page_is_free(state)) { free_pages++; } else if (dca_page_is_allocated(state, attr->buf_id)) { free_pages++; /* Change matched pages state */ unlock_dca_page_to_active(state, attr->buf_id); changed = true; attr->alloc_pages++; if (attr->alloc_pages == attr->max_pages) stop = true; } } for (; changed && i < mem->page_count; i++) if (dca_page_is_free(state)) free_pages++; /* Clean mem changed to dirty */ if (changed && free_pages == mem->page_count) attr->dirty_mems++; return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; } static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) { struct dca_page_active_attr attr = {}; unsigned long flags; attr.buf_id = buf_id; attr.max_pages = count; travel_dca_pages(ctx, &attr, active_dca_pages_proc); /* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); ctx->free_mems -= attr.dirty_mems; ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags); return attr.alloc_pages; } struct dca_page_query_active_attr { u32 buf_id; u32 curr_index; u32 start_index; u32 page_index; u32 page_count; u64 mem_key; }; static int sync_dca_buf_offset(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr) { struct ib_device *ibdev = &hr_dev->ib_dev; if (hr_qp->sq.wqe_cnt > 0) { if (attr->sq_offset >= hr_qp->sge.offset) { ibdev_err(ibdev, "failed to check SQ offset = %u\n", attr->sq_offset); return -EINVAL; } hr_qp->sq.wqe_offset = hr_qp->sq.offset + attr->sq_offset; } if (hr_qp->sge.sge_cnt > 0) { if (attr->sge_offset >= hr_qp->rq.offset) { ibdev_err(ibdev, "failed to check exSGE offset = %u\n", attr->sge_offset); return -EINVAL; } hr_qp->sge.wqe_offset = hr_qp->sge.offset + attr->sge_offset; } if (hr_qp->rq.wqe_cnt > 0) { if (attr->rq_offset >= hr_qp->buff_size) { ibdev_err(ibdev, "failed to check RQ offset = %u\n", attr->rq_offset); return -EINVAL; } hr_qp->rq.wqe_offset = hr_qp->rq.offset + attr->rq_offset; } return 0; } static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx) { u32 buf_pages, unit_pages, alloc_pages; u32 buf_id; buf_pages = hr_qp->dca_cfg.npages; /* Gen new buf id */ buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count); /* Assign pages from free pages */ unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); if (buf_pages != alloc_pages) { if (alloc_pages > 0) clear_dca_pages(ctx, buf_id, alloc_pages); return HNS_DCA_INVALID_BUF_ID; } return buf_id; } static int active_alloced_buf(struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx, struct hns_dca_attach_attr *attr, u32 buf_id) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); struct ib_device *ibdev = &hr_dev->ib_dev; u32 active_pages, alloc_pages; int ret; alloc_pages = hr_qp->dca_cfg.npages; ret = sync_dca_buf_offset(hr_dev, hr_qp, attr); if (ret) { ibdev_err(ibdev, "failed to sync DCA offset, ret = %d\n", ret); goto active_fail; } ret = setup_dca_buf_to_hw(hr_dev, hr_qp, ctx, buf_id, alloc_pages); if (ret) { ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); goto active_fail; } active_pages = active_dca_pages(ctx, buf_id, alloc_pages); if (active_pages != alloc_pages) { ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", active_pages, alloc_pages); ret = -ENOBUFS; goto active_fail; } return 0; active_fail: clear_dca_pages(ctx, buf_id, alloc_pages); return ret; } #define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) #define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) static bool start_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) { unsigned long *st = ctx->sync_status; if (st && dcan < ctx->max_qps) return !test_and_set_bit_lock(DCAN_TO_SYNC_BIT(dcan), st); return true; } static void stop_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) { unsigned long *st = ctx->sync_status; if (st && dcan < ctx->max_qps) clear_bit_unlock(DCAN_TO_SYNC_BIT(dcan), st); } static void update_dca_buf_status(struct hns_roce_dca_ctx *ctx, u32 dcan, bool en) { unsigned long *st = ctx->buf_status; if (st && dcan < ctx->max_qps) { if (en) set_bit(DCAN_TO_STAT_BIT(dcan), st); else clear_bit(DCAN_TO_STAT_BIT(dcan), st); /* sync status with user-space rdma */ smp_mb__after_atomic(); } } static void restart_aging_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_dca_ctx *ctx) { spin_lock(&ctx->aging_lock); ctx->exit_aging = false; if (!list_empty(&ctx->aging_new_list)) queue_delayed_work(hr_dev->irq_workq, &ctx->aging_dwork, msecs_to_jiffies(DCA_MEM_AGEING_MSES)); spin_unlock(&ctx->aging_lock); } static void stop_aging_dca_mem(struct hns_roce_dca_ctx *ctx, struct hns_roce_dca_cfg *cfg, bool stop_worker) { spin_lock(&ctx->aging_lock); if (stop_worker) { ctx->exit_aging = true; cancel_delayed_work(&ctx->aging_dwork); } spin_lock(&cfg->lock); if (!list_empty(&cfg->aging_node)) list_del_init(&cfg->aging_node); spin_unlock(&cfg->lock); spin_unlock(&ctx->aging_lock); } static int attach_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr, struct hns_dca_attach_resp *resp) { struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; u32 buf_id; int ret; if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH) stop_aging_dca_mem(ctx, cfg, false); resp->alloc_flags = 0; spin_lock(&cfg->lock); buf_id = cfg->buf_id; /* Already attached */ if (buf_id != HNS_DCA_INVALID_BUF_ID) { resp->alloc_pages = cfg->npages; spin_unlock(&cfg->lock); return 0; } /* Start to new attach */ resp->alloc_pages = 0; buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); if (buf_id == HNS_DCA_INVALID_BUF_ID) { spin_unlock(&cfg->lock); /* No report fail, need try again after the pool increased */ return 0; } ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); if (ret) { spin_unlock(&cfg->lock); ibdev_err(&hr_dev->ib_dev, "failed to active DCA buf for QP-%lu, ret = %d.\n", hr_qp->qpn, ret); return ret; } /* Attach ok */ cfg->buf_id = buf_id; cfg->attach_count++; spin_unlock(&cfg->lock); resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER; resp->alloc_pages = cfg->npages; update_dca_buf_status(ctx, cfg->dcan, true); return 0; } struct dca_page_free_buf_attr { u32 buf_id; u32 max_pages; u32 free_pages; u32 clean_mems; }; static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) { struct dca_page_free_buf_attr *attr = param; struct hns_dca_page_state *state; bool changed = false; bool stop = false; int i, free_pages; free_pages = 0; for (i = 0; !stop && i < mem->page_count; i++) { state = &mem->states[i]; /* Change matched pages state */ if (dca_page_is_attached(state, attr->buf_id)) { set_dca_page_to_free(state); changed = true; attr->free_pages++; if (attr->free_pages == attr->max_pages) stop = true; } if (dca_page_is_free(state)) free_pages++; } for (; changed && i < mem->page_count; i++) if (dca_page_is_free(state)) free_pages++; if (changed && free_pages == mem->page_count) attr->clean_mems++; return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; } static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, struct hns_roce_dca_cfg *cfg) { struct dca_page_free_buf_attr attr = {}; unsigned long flags; u32 buf_id; update_dca_buf_status(ctx, cfg->dcan, false); spin_lock(&cfg->lock); buf_id = cfg->buf_id; cfg->buf_id = HNS_DCA_INVALID_BUF_ID; spin_unlock(&cfg->lock); if (buf_id == HNS_DCA_INVALID_BUF_ID) return; attr.buf_id = buf_id; attr.max_pages = cfg->npages; travel_dca_pages(ctx, &attr, free_buffer_pages_proc); /* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); ctx->free_mems += attr.clean_mems; ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags); } void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_detach_attr *attr) { struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; stop_aging_dca_mem(ctx, cfg, true); spin_lock(&ctx->aging_lock); spin_lock(&cfg->lock); cfg->sq_idx = attr->sq_idx; list_add_tail(&cfg->aging_node, &ctx->aging_new_list); spin_unlock(&cfg->lock); spin_unlock(&ctx->aging_lock); restart_aging_dca_mem(hr_dev, ctx); } struct dca_mem_shrink_attr { u64 shrink_key; u32 shrink_mems; }; static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) { struct dca_mem_shrink_attr *attr = param; struct hns_dca_page_state *state; int i, free_pages; free_pages = 0; for (i = 0; i < mem->page_count; i++) { state = &mem->states[i]; if (dca_page_is_free(state)) free_pages++; } /* No any page be used */ if (free_pages == mem->page_count) { /* unregister first empty DCA mem */ if (!attr->shrink_mems) { mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; attr->shrink_key = mem->key; } attr->shrink_mems++; } if (attr->shrink_mems > 1) return DCA_MEM_STOP_ITERATE; else return DCA_MEM_NEXT_ITERATE; } struct hns_dca_shrink_resp { u64 free_key; u32 free_mems; }; static void shrink_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, u64 reserved_size, struct hns_dca_shrink_resp *resp) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct dca_mem_shrink_attr attr = {}; unsigned long flags; bool need_shink; spin_lock_irqsave(&ctx->pool_lock, flags); need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; spin_unlock_irqrestore(&ctx->pool_lock, flags); if (!need_shink) return; travel_dca_pages(ctx, &attr, shrink_dca_page_proc); resp->free_mems = attr.shrink_mems; resp->free_key = attr.shrink_key; } static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_dca_ctx *ctx) { struct hns_roce_dca_cfg *cfg, *tmp_cfg; struct hns_roce_qp *hr_qp; spin_lock(&ctx->aging_lock); list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) list_move(&cfg->aging_node, &ctx->aging_proc_list); while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { cfg = list_first_entry(&ctx->aging_proc_list, struct hns_roce_dca_cfg, aging_node); list_del_init_careful(&cfg->aging_node); hr_qp = container_of(cfg, struct hns_roce_qp, dca_cfg); spin_unlock(&ctx->aging_lock); if (start_free_dca_buf(ctx, cfg->dcan)) { if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) free_buf_from_dca_mem(ctx, cfg); stop_free_dca_buf(ctx, cfg->dcan); } spin_lock(&ctx->aging_lock); spin_lock(&cfg->lock); if (cfg->buf_id != HNS_DCA_INVALID_BUF_ID) list_move(&cfg->aging_node, &ctx->aging_new_list); spin_unlock(&cfg->lock); } spin_unlock(&ctx->aging_lock); } static void udca_mem_aging_work(struct work_struct *work) { struct hns_roce_dca_ctx *ctx = container_of(work, struct hns_roce_dca_ctx, aging_dwork.work); struct hns_roce_ucontext *uctx = container_of(ctx, struct hns_roce_ucontext, dca_ctx); struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); cancel_delayed_work(&ctx->aging_dwork); process_aging_dca_mem(hr_dev, ctx); if (!ctx->exit_aging) restart_aging_dca_mem(hr_dev, ctx); } static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev); static void kdca_mem_aging_work(struct work_struct *work) { struct hns_roce_dca_ctx *ctx = container_of(work, struct hns_roce_dca_ctx, aging_dwork.work); struct hns_roce_dev *hr_dev = container_of(ctx, struct hns_roce_dev, dca_ctx); cancel_delayed_work(&ctx->aging_dwork); process_aging_dca_mem(hr_dev, ctx); remove_unused_dca_mem(hr_dev); if (!ctx->exit_aging) restart_aging_dca_mem(hr_dev, ctx); } static void init_dca_context(struct hns_roce_dca_ctx *ctx, bool is_user) { INIT_LIST_HEAD(&ctx->pool); spin_lock_init(&ctx->pool_lock); ctx->total_size = 0; ida_init(&ctx->ida); INIT_LIST_HEAD(&ctx->aging_new_list); INIT_LIST_HEAD(&ctx->aging_proc_list); spin_lock_init(&ctx->aging_lock); ctx->exit_aging = false; if (is_user) INIT_DELAYED_WORK(&ctx->aging_dwork, udca_mem_aging_work); else INIT_DELAYED_WORK(&ctx->aging_dwork, kdca_mem_aging_work); } static void cleanup_dca_context(struct hns_roce_dev *hr_dev, struct hns_roce_dca_ctx *ctx) { struct dca_mem *mem, *tmp; unsigned long flags; bool is_user; cancel_delayed_work_sync(&ctx->aging_dwork); is_user = (ctx != &hr_dev->dca_ctx); spin_lock_irqsave(&ctx->pool_lock, flags); list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { list_del(&mem->list); spin_lock(&mem->lock); mem->flags = 0; spin_unlock(&mem->lock); spin_unlock_irqrestore(&ctx->pool_lock, flags); kfree(mem->states); free_dca_pages(hr_dev, is_user, mem->pages); kfree(mem); spin_lock_irqsave(&ctx->pool_lock, flags); } ctx->total_size = 0; spin_unlock_irqrestore(&ctx->pool_lock, flags); } #define DCA_MAX_MEM_SIZE ~0UL static uint dca_unit_size; static ulong dca_min_size = DCA_MAX_MEM_SIZE; static ulong dca_max_size = DCA_MAX_MEM_SIZE; static void load_kdca_param(struct hns_roce_dca_ctx *ctx) { unsigned int unit_size; unit_size = ALIGN(dca_unit_size, PAGE_SIZE); ctx->unit_size = unit_size; if (!unit_size) return; if (dca_max_size == DCA_MAX_MEM_SIZE || dca_max_size == 0) ctx->max_size = DCA_MAX_MEM_SIZE; else ctx->max_size = roundup(dca_max_size, unit_size); if (dca_min_size == DCA_MAX_MEM_SIZE) ctx->min_size = ctx->max_size; else ctx->min_size = roundup(dca_min_size, unit_size); } void hns_roce_init_dca(struct hns_roce_dev *hr_dev) { load_kdca_param(&hr_dev->dca_ctx); init_dca_context(&hr_dev->dca_ctx, false); } void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) { cleanup_dca_context(hr_dev, &hr_dev->dca_ctx); } static void init_udca_status(struct hns_roce_ucontext *uctx, int udca_max_qps, unsigned int dev_max_qps) { struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct ib_ucontext *ib_uctx = &uctx->ibucontext; void *kaddr; size_t size; size = BITS_TO_BYTES(udca_max_qps * bits_per_qp); ctx->status_npage = DIV_ROUND_UP(size, PAGE_SIZE); size = ctx->status_npage * PAGE_SIZE; ctx->max_qps = min_t(unsigned int, dev_max_qps, size * BITS_PER_BYTE / bits_per_qp); kaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); if (!kaddr) return; ctx->dca_mmap_entry = hns_roce_user_mmap_entry_insert(ib_uctx, (u64)kaddr, size, HNS_ROCE_MMAP_TYPE_DCA); if (!ctx->dca_mmap_entry) { free_pages_exact(kaddr, size); return; } ctx->buf_status = (unsigned long *)kaddr; ctx->sync_status = (unsigned long *)(kaddr + size / 2); } void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps, struct hns_roce_ucontext *uctx) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return; init_dca_context(ctx, true); if (max_qps > 0) init_udca_status(uctx, max_qps, hr_dev->caps.num_qps); } void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return; cleanup_dca_context(hr_dev, ctx); if (ctx->buf_status) { free_pages_exact(ctx->buf_status, ctx->status_npage * PAGE_SIZE); ctx->buf_status = NULL; } ida_destroy(&ctx->ida); } static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key) { struct dca_mem *mem; list_for_each_entry(mem, head, list) if (mem->key == key) return mem; return NULL; } static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, u32 alloc_size) { unsigned long flags; bool enable; spin_lock_irqsave(&ctx->pool_lock, flags); /* Pool size no limit */ if (ctx->max_size == DCA_MAX_MEM_SIZE) enable = true; else /* Pool size not exceed max size */ enable = (ctx->total_size + alloc_size) < ctx->max_size; spin_unlock_irqrestore(&ctx->pool_lock, flags); return enable; } static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) { unsigned long flags; bool enable; spin_lock_irqsave(&ctx->pool_lock, flags); enable = ctx->total_size > 0 && ctx->min_size < ctx->max_size; spin_unlock_irqrestore(&ctx->pool_lock, flags); return enable; } static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) { struct dca_mem *mem, *tmp, *found = NULL; unsigned long flags; spin_lock_irqsave(&ctx->pool_lock, flags); list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { spin_lock(&mem->lock); if (!mem->flags) { found = mem; mem->flags |= DCA_MEM_FLAGS_ALLOCED; spin_unlock(&mem->lock); break; } spin_unlock(&mem->lock); } spin_unlock_irqrestore(&ctx->pool_lock, flags); if (found) return found; mem = kzalloc(sizeof(*mem), GFP_ATOMIC); if (!mem) return NULL; spin_lock_init(&mem->lock); INIT_LIST_HEAD(&mem->list); mem->flags |= DCA_MEM_FLAGS_ALLOCED; spin_lock_irqsave(&ctx->pool_lock, flags); list_add(&mem->list, &ctx->pool); spin_unlock_irqrestore(&ctx->pool_lock, flags); return mem; } static void free_dca_mem(struct dca_mem *mem) { /* When iterate all DCA mems in travel_dca_pages(), we will NOT hold the * pool's lock and just set the DCA mem as free state during the DCA is * working until cleanup the DCA context in hns_roce_cleanup_dca(). */ spin_lock(&mem->lock); mem->flags = 0; spin_unlock(&mem->lock); } static int add_dca_mem(struct hns_roce_dev *hr_dev, u32 new_size) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); struct dca_mem_attr attr = {}; struct dca_mem *mem = NULL; int ret; if (!add_dca_mem_enabled(ctx, new_size)) return -ENOMEM; /* Add new DCA mem */ mem = alloc_dca_mem(ctx); if (!mem) return -ENOMEM; attr.key = (u64)mem; attr.size = roundup(new_size, ctx->unit_size); ret = register_dca_mem(hr_dev, NULL, mem, &attr); if (ret) { free_dca_mem(mem); ibdev_err(&hr_dev->ib_dev, "failed to register DCA mem, ret = %d.\n", ret); } return ret; } struct dca_page_get_active_buf_attr { u32 buf_id; void **buf_list; u32 total; u32 max; }; static int get_active_kbuf_proc(struct dca_mem *mem, int index, void *param) { struct dca_page_get_active_buf_attr *attr = param; struct hns_dca_page_state *states = mem->states; struct hns_roce_buf *kmem = mem->pages; void *buf; u32 i; for (i = 0; i < kmem->npages; i++) { if (!dca_page_is_active(&states[i], attr->buf_id)) continue; buf = hns_roce_buf_offset(kmem, i << HNS_HW_PAGE_SHIFT); attr->buf_list[attr->total++] = buf; if (attr->total >= attr->max) return DCA_MEM_STOP_ITERATE; } return DCA_MEM_NEXT_ITERATE; } static int setup_dca_buf_list(struct hns_roce_dca_ctx *ctx, struct hns_roce_dca_cfg *cfg) { struct dca_page_get_active_buf_attr attr = {}; attr.buf_id = cfg->buf_id; attr.buf_list = cfg->buf_list; attr.max = cfg->npages; travel_dca_pages(ctx, &attr, get_active_kbuf_proc); return attr.total == attr.max ? 0 : -ENOMEM; } #define DCA_EXPAND_MEM_TRY_TIMES 3 int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; struct hns_dca_attach_resp resp = {}; bool is_new_buf = true; int try_times = 0; int ret; do { resp.alloc_flags = 0; ret = attach_dca_mem(hr_dev, hr_qp, attr, &resp); if (ret) break; if (resp.alloc_pages >= cfg->npages) { is_new_buf = !!(resp.alloc_flags & HNS_DCA_ATTACH_FLAGS_NEW_BUFFER); break; } ret = add_dca_mem(hr_dev, hr_qp->buff_size); if (ret) break; } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); if (ret || resp.alloc_pages < cfg->npages) { ibdev_err(&hr_dev->ib_dev, "failed to attach buf %u != %u, try %d, ret = %d.\n", cfg->npages, resp.alloc_pages, try_times, ret); return -ENOMEM; } /* DCA config not changed */ if (!is_new_buf && cfg->buf_list[0]) return 0; return setup_dca_buf_list(hr_qp_to_dca_ctx(hr_dev, hr_qp), cfg); } static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) { struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); struct hns_dca_shrink_resp resp = {}; struct dca_mem *mem; unsigned long flags; while (shrink_dca_mem_enabled(ctx)) { resp.free_mems = 0; shrink_dca_mem(hr_dev, NULL, ctx->min_size, &resp); if (resp.free_mems < 1) break; spin_lock_irqsave(&ctx->pool_lock, flags); mem = key_to_dca_mem(&ctx->pool, resp.free_key); spin_unlock_irqrestore(&ctx->pool_lock, flags); if (!mem) break; unregister_dca_mem(hr_dev, NULL, mem); free_dca_mem(mem); /* No more free memory */ if (resp.free_mems <= 1) break; } } static void kick_dca_buf(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg, struct hns_roce_dca_ctx *ctx) { stop_aging_dca_mem(ctx, cfg, true); free_buf_from_dca_mem(ctx, cfg); restart_aging_dca_mem(hr_dev, ctx); /* Shrink kenrel DCA mem */ if (ctx == &hr_dev->dca_ctx) remove_unused_dca_mem(hr_dev); } static u32 alloc_dca_num(struct hns_roce_dca_ctx *ctx) { int ret; ret = ida_alloc_max(&ctx->ida, ctx->max_qps - 1, GFP_KERNEL); if (ret < 0) return HNS_DCA_INVALID_DCA_NUM; stop_free_dca_buf(ctx, ret); update_dca_buf_status(ctx, ret, false); return ret; } static void free_dca_num(u32 dcan, struct hns_roce_dca_ctx *ctx) { if (dcan == HNS_DCA_INVALID_DCA_NUM) return; ida_free(&ctx->ida, dcan); } static int setup_kdca(struct hns_roce_dca_cfg *cfg) { if (!cfg->npages) return -EINVAL; cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), GFP_KERNEL); if (!cfg->buf_list) return -ENOMEM; return 0; } static void teardown_kdca(struct hns_roce_dca_cfg *cfg) { kfree(cfg->buf_list); cfg->buf_list = NULL; } int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; spin_lock_init(&cfg->lock); INIT_LIST_HEAD(&cfg->aging_node); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; cfg->dcan = HNS_DCA_INVALID_DCA_NUM; /* Cannot support dynamic detach when rq is not empty */ if (!hr_qp->rq.wqe_cnt) hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH; if (!udata) return setup_kdca(cfg); return 0; } void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; kick_dca_buf(hr_dev, cfg, ctx); free_dca_num(cfg->dcan, ctx); cfg->dcan = HNS_DCA_INVALID_DCA_NUM; if (!udata) teardown_kdca(&hr_qp->dca_cfg); } void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) { kick_dca_buf(hr_dev, cfg, ctx); free_dca_num(cfg->dcan, ctx); cfg->dcan = HNS_DCA_INVALID_DCA_NUM; } else if (hr_qp->state == IB_QPS_RTR) { free_dca_num(cfg->dcan, ctx); cfg->dcan = alloc_dca_num(ctx); } } static inline struct hns_roce_ucontext * uverbs_attr_to_hr_uctx(struct uverbs_attr_bundle *attrs) { return rdma_udata_to_drv_context(&attrs->driver_udata, struct hns_roce_ucontext, ibucontext); } #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)( struct uverbs_attr_bundle *attrs) { struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); struct dca_mem_attr init_attr = {}; struct dca_mem *mem; int ret; ret = uverbs_copy_from(&init_attr.addr, attrs, HNS_IB_ATTR_DCA_MEM_REG_ADDR); if (!ret) ret = uverbs_copy_from(&init_attr.size, attrs, HNS_IB_ATTR_DCA_MEM_REG_LEN); if (!ret) ret = uverbs_copy_from(&init_attr.key, attrs, HNS_IB_ATTR_DCA_MEM_REG_KEY); if (ret) return ret; if (!init_attr.size) return -EINVAL; init_attr.size = hr_hw_page_align(init_attr.size); mem = alloc_dca_mem(to_hr_dca_ctx(hr_dev, uctx)); if (!mem) return -ENOMEM; ret = register_dca_mem(hr_dev, uctx, mem, &init_attr); if (ret) { free_dca_mem(mem); return ret; } uobj->object = mem; return 0; } static int dca_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) { struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); struct dca_mem *mem; /* One DCA MEM maybe shared by many QPs, so the DCA mem uobject must * be destroyed before all QP uobjects, and we will destroy the DCA * uobjects when cleanup DCA context by calling hns_roce_cleanup_dca(). */ if (why == RDMA_REMOVE_CLOSE || why == RDMA_REMOVE_DRIVER_REMOVE) return 0; mem = uobject->object; unregister_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, mem); free_dca_mem(mem); return 0; } DECLARE_UVERBS_NAMED_METHOD( HNS_IB_METHOD_DCA_MEM_REG, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_NEW, UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_LEN, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_ADDR, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_KEY, UVERBS_ATTR_TYPE(u64), UA_MANDATORY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( HNS_IB_METHOD_DCA_MEM_DEREG, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_DESTROY, UA_MANDATORY)); static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_SHRINK)( struct uverbs_attr_bundle *attrs) { struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); struct hns_dca_shrink_resp resp = {}; u64 reserved_size = 0; int ret; ret = uverbs_copy_from(&reserved_size, attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE); if (ret) return ret; shrink_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, reserved_size, &resp); ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, &resp.free_key, sizeof(resp.free_key)); if (!ret) ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, &resp.free_mems, sizeof(resp.free_mems)); if (ret) return ret; return 0; } DECLARE_UVERBS_NAMED_METHOD( HNS_IB_METHOD_DCA_MEM_SHRINK, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); static inline struct hns_roce_qp * uverbs_attr_to_hr_qp(struct uverbs_attr_bundle *attrs, u16 idx) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, idx); if (IS_ERR(uobj)) return ERR_CAST(uobj); return to_hr_qp(uobj->object); } static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_ATTACH)( struct uverbs_attr_bundle *attrs) { struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE); struct hns_dca_attach_attr attr = {}; struct hns_dca_attach_resp resp = {}; int ret; if (IS_ERR(hr_qp)) return PTR_ERR(hr_qp); ret = uverbs_copy_from(&attr.sq_offset, attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET); if (!ret) ret = uverbs_copy_from(&attr.sge_offset, attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET); if (!ret) ret = uverbs_copy_from(&attr.rq_offset, attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET); if (ret) return ret; ret = attach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr, &resp); if (ret) return ret; ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, &resp.alloc_flags, sizeof(resp.alloc_flags)); if (!ret) ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, &resp.alloc_pages, sizeof(resp.alloc_pages)); return ret; } DECLARE_UVERBS_NAMED_METHOD( HNS_IB_METHOD_DCA_MEM_ATTACH, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, UVERBS_OBJECT_QP, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( struct uverbs_attr_bundle *attrs) { struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE); struct hns_dca_detach_attr attr = {}; int ret; if (IS_ERR(hr_qp)) return PTR_ERR(hr_qp); ret = uverbs_copy_from(&attr.sq_idx, attrs, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX); if (ret) return ret; hns_roce_dca_detach(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr); return 0; } DECLARE_UVERBS_NAMED_METHOD( HNS_IB_METHOD_DCA_MEM_DETACH, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, UVERBS_OBJECT_QP, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); static int query_dca_active_pages_proc(struct dca_mem *mem, int index, void *param) { struct hns_dca_page_state *state = &mem->states[index]; struct dca_page_query_active_attr *attr = param; if (!dca_page_is_active(state, attr->buf_id)) return 0; if (attr->curr_index < attr->start_index) { attr->curr_index++; return 0; } else if (attr->curr_index > attr->start_index) { return DCA_MEM_STOP_ITERATE; } /* Search first page in DCA mem */ attr->page_index = index; attr->mem_key = mem->key; /* Search active pages in continuous addresses */ while (index < mem->page_count) { state = &mem->states[index]; if (!dca_page_is_active(state, attr->buf_id)) break; index++; attr->page_count++; } return DCA_MEM_STOP_ITERATE; } static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_QUERY)( struct uverbs_attr_bundle *attrs) { struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE); struct dca_page_query_active_attr active_attr = {}; struct hns_roce_dca_ctx *ctx = NULL; struct hns_roce_dev *hr_dev = NULL; u32 page_idx, page_ofs; int ret; if (IS_ERR(hr_qp)) return PTR_ERR(hr_qp); hr_dev = to_hr_dev(hr_qp->ibqp.device); ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); if (!ctx) return -ENOENT; ret = uverbs_copy_from(&page_idx, attrs, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX); if (ret) return ret; active_attr.buf_id = hr_qp->dca_cfg.buf_id; active_attr.start_index = page_idx; travel_dca_pages(ctx, &active_attr, query_dca_active_pages_proc); page_ofs = active_attr.page_index << HNS_HW_PAGE_SHIFT; if (!active_attr.page_count) return -ENOMEM; ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, &active_attr.mem_key, sizeof(active_attr.mem_key)); if (!ret) ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, &page_ofs, sizeof(page_ofs)); if (!ret) ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, &active_attr.page_count, sizeof(active_attr.page_count)); return ret; } DECLARE_UVERBS_NAMED_METHOD( HNS_IB_METHOD_DCA_MEM_QUERY, UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, UVERBS_OBJECT_QP, UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, UVERBS_ATTR_TYPE(u32), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DETACH), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_QUERY)); static bool dca_is_supported(struct ib_device *device) { struct hns_roce_dev *dev = to_hr_dev(device); return dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE; } const struct uapi_definition hns_roce_dca_uapi_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( HNS_IB_OBJECT_DCA_MEM, UAPI_DEF_IS_OBJ_SUPPORTED(dca_is_supported)), {} }; #else const struct uapi_definition hns_roce_dca_uapi_defs[] = { }; #endif /* enum DCA pool */ struct dca_mem_enum_attr { void *param; hns_dca_enum_callback enum_fn; }; static int enum_dca_pool_proc(struct dca_mem *mem, int index, void *param) { struct dca_mem_enum_attr *attr = param; int ret; ret = attr->enum_fn(mem->states, mem->page_count, attr->param); return ret ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; } void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb) { struct dca_mem_enum_attr attr; attr.enum_fn = cb; attr.param = param; travel_dca_pages(dca_ctx, &attr, enum_dca_pool_proc); } module_param(dca_unit_size, uint, 0444); module_param(dca_max_size, ulong, 0444); module_param(dca_min_size, ulong, 0444);