2026-01-29 22:25:33 +08:00

343 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 - 2023, Shanghai Yunsilicon Technology Co., Ltd.
* All rights reserved.
*/
#include <linux/module.h>
#include <rdma/ib_umem.h>
#include "xsc_ib.h"
static inline int xsc_count_trailing_zeros(unsigned long x)
{
#define COUNT_TRAILING_ZEROS_0 (-1)
if (sizeof(x) == 4)
return ffs(x);
else
return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0;
}
int xsc_find_chunk_cont_0(struct xsc_pa_chunk *chunk,
int is_first,
int is_last)
{
static const int max_count = sizeof(int) << 3;
dma_addr_t pa, end_pa;
u64 va, end_va;
size_t length;
int start_count, end_count;
int va_start_count, va_end_count;
pa = chunk->pa;
va = chunk->va;
length = chunk->length;
end_pa = pa + length;
end_va = va + length;
start_count = max_count;
end_count = max_count;
if (!is_first) {
start_count = xsc_count_trailing_zeros((unsigned long)pa);
va_start_count = xsc_count_trailing_zeros(va);
start_count = min_t(int, start_count, va_start_count);
}
if (!is_last) {
end_count = xsc_count_trailing_zeros((unsigned long)end_pa);
va_end_count = xsc_count_trailing_zeros(end_va);
end_count = min_t(int, end_count, va_end_count);
}
return start_count > end_count ? end_count : start_count;
}
int xsc_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt,
int *npages,
int *shift,
u64 **pas)
{
struct scatterlist *sg;
unsigned long va;
dma_addr_t pa;
struct xsc_pa_chunk *chunk, *tmp;
struct list_head chunk_list;
int i;
int chunk_cnt;
int min_count_0 = sizeof(int) << 3;
int count_0;
int is_first = 0, is_end = 0;
size_t pgsz;
u64 mask;
int err = 0;
int pa_index;
u64 chunk_pa;
int chunk_npages;
unsigned long page_shift = PAGE_SHIFT;
pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, 0);
va = (virt >> page_shift) << page_shift;
INIT_LIST_HEAD(&chunk_list);
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (!chunk) {
err = -ENOMEM;
goto err_alloc;
}
list_add_tail(&chunk->list, &chunk_list);
chunk_cnt = 1;
for_each_sg(umem->sgt_append.sgt.sgl, sg, umem->sgt_append.sgt.nents, i) {
pa = sg_dma_address(sg);
if (i == 0) {
chunk->va = va;
chunk->pa = pa;
chunk->length = sg_dma_len(sg);
va += chunk->length;
continue;
}
if (pa == chunk->pa + chunk->length) {
chunk->length += sg_dma_len(sg);
va += chunk->length;
} else {
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (!chunk) {
err = -ENOMEM;
goto err_alloc;
}
chunk->va = va;
chunk->pa = pa;
chunk->length = sg_dma_len(sg);
va += chunk->length;
list_add_tail(&chunk->list, &chunk_list);
chunk_cnt++;
}
}
i = 0;
list_for_each_entry(chunk, &chunk_list, list) {
is_first = (i == 0 ? 1 : 0);
is_end = (i == chunk_cnt - 1 ? 1 : 0);
count_0 = xsc_find_chunk_cont_0(chunk, is_first, is_end);
if (count_0 < min_count_0)
min_count_0 = count_0;
i++;
}
pgsz_bitmap &= GENMASK(min_count_0, 0);
pgsz = rounddown_pow_of_two(pgsz_bitmap);
*shift = ilog2(pgsz);
*npages = 0;
if (chunk_cnt == 1) {
list_for_each_entry(chunk, &chunk_list, list) {
mask = GENMASK(*shift - 1, min_t(int, page_shift, *shift - 1));
*npages += DIV_ROUND_UP(chunk->length + (virt & mask), pgsz);
*pas = vmalloc(*npages * sizeof(u64));
if (!*pas) {
err = -ENOMEM;
goto err_alloc;
}
chunk_pa = chunk->pa - (virt & mask);
for (i = 0; i < *npages; i++)
(*pas)[i] = chunk_pa + i * pgsz;
}
} else {
list_for_each_entry(chunk, &chunk_list, list) {
*npages += DIV_ROUND_UP(chunk->length, pgsz);
}
*pas = vmalloc(*npages * sizeof(u64));
if (!*pas) {
err = -ENOMEM;
goto err_alloc;
}
pa_index = 0;
list_for_each_entry(chunk, &chunk_list, list) {
chunk_npages = DIV_ROUND_UP(chunk->length, pgsz);
chunk_pa = chunk->pa;
for (i = 0; i < chunk_npages; i++) {
if (pa_index == 0) {
mask = GENMASK(*shift - 1,
min_t(int, page_shift, *shift - 1));
chunk_pa -= (virt & mask);
}
(*pas)[pa_index] = chunk_pa + i * pgsz;
pa_index++;
}
}
}
err_alloc:
list_for_each_entry_safe(chunk, tmp, &chunk_list, list) {
list_del(&chunk->list);
kfree(chunk);
}
return err;
}
/* @umem: umem object to scan
* @addr: ib virtual address requested by the user
* @count: number of PAGE_SIZE pages covered by umem
* @shift: page shift for the compound pages found in the region
* @ncont: number of compund pages
* @order: log2 of the number of compound pages
*/
void __xsc_ib_cont_pages(struct ib_umem *umem, u64 addr,
unsigned long max_page_shift,
int *count, int *shift,
int *ncont, int *order)
{
unsigned long tmp;
unsigned long m;
u64 base = ~0, p = 0;
u64 len, pfn;
int i = 0;
struct scatterlist *sg;
int entry;
unsigned long page_shift = PAGE_SHIFT;
addr = addr >> page_shift;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
if (max_page_shift)
m = min_t(unsigned long, max_page_shift - page_shift, m);
for_each_sg(umem->sgt_append.sgt.sgl, sg, umem->sgt_append.sgt.nents, entry) {
len = sg_dma_len(sg) >> page_shift;
pfn = sg_dma_address(sg) >> page_shift;
if (base + p != pfn) {
/* If either the offset or the new
* base are unaligned update m
*/
tmp = (unsigned long)(pfn | p);
if (!IS_ALIGNED(tmp, 1 << m))
m = find_first_bit(&tmp, BITS_PER_LONG);
base = pfn;
p = 0;
}
p += len;
i += len;
}
if (i) {
m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
if (order)
*order = ilog2(roundup_pow_of_two(i) >> m);
*ncont = DIV_ROUND_UP(i, (1 << m));
} else {
m = 0;
if (order)
*order = 0;
*ncont = 0;
}
*shift = page_shift + m;
*count = i;
}
void xsc_ib_cont_pages(struct ib_umem *umem, u64 addr,
int *count, int *shift,
int *ncont, int *order)
{
__xsc_ib_cont_pages(umem, addr, 0, count, shift, ncont, order);
}
void __xsc_ib_populate_pas(struct xsc_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags, bool need_to_devide)
{
unsigned long umem_page_shift = PAGE_SHIFT;
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i = 0;
int k, idx;
u64 cur = 0;
u64 base;
int len;
struct scatterlist *sg;
int entry;
for_each_sg(umem->sgt_append.sgt.sgl, sg, umem->sgt_append.sgt.nents, entry) {
len = sg_dma_len(sg) >> umem_page_shift;
if (need_to_devide)
len = sg_dma_len(sg) >> PAGE_SHIFT_4K;
else
len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
/* Skip elements below offset */
if (i + len < offset << shift) {
i += len;
continue;
}
/* Skip pages below offset */
if (i < offset << shift) {
k = (offset << shift) - i;
i = offset << shift;
} else {
k = 0;
}
for (; k < len; k++) {
if (!(i & mask)) {
if (need_to_devide)
cur = base + (k << PAGE_SHIFT_4K);
else
cur = base + (k << umem_page_shift);
cur |= access_flags;
idx = (i >> shift) - offset;
pas[idx] = cpu_to_be64(cur);
xsc_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[idx]));
}
i++;
/* Stop after num_pages reached */
if (i >> shift >= offset + num_pages)
return;
}
}
}
void xsc_ib_populate_pas(struct xsc_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int npages, bool need_to_devide)
{
return __xsc_ib_populate_pas(dev, umem, page_shift, 0,
npages, pas, 0, need_to_devide);
}
int xsc_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
{
u64 page_size;
u64 page_mask;
u64 off_size;
u64 off_mask;
u64 buf_off;
page_size = 1 << page_shift;
page_mask = page_size - 1;
buf_off = addr & page_mask;
off_size = page_size >> 6;
off_mask = off_size - 1;
if (buf_off & off_mask)
return -EINVAL;
*offset = buf_off >> ilog2(off_size);
return 0;
}