// SPDX-License-Identifier: GPL-2.0 /* * iommu.c: Generic sw64 IOMMU support * * This is designed and tested for 3231. If there are no changes in hardware * in later chips, then it should work just as well. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sunway_iommu.h" #define MAX_DOMAIN_NUM 65536 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define SW64_DMA_LIMIT (0xe0000000 - 1) #define SW64_BAR_ADDRESS (IO_BASE | PCI_BASE) #define SW64_IOMMU_LEVEL1_OFFSET 0x1ff #define SW64_IOMMU_LEVEL2_OFFSET 0x3ff #define SW64_IOMMU_GRN_8K ((0UL) << 4) /* page size as 8KB */ #define SW64_IOMMU_GRN_8M ((0x2UL) << 4) /* page size as 8MB */ #define SW64_IOMMU_PGSIZES (((1ULL) << PAGE_SHIFT) | ((1ULL) << PAGE_8M_SHIFT)) #define IDENTMAP_ALL ((1U) << 0) #define DMA_MASK64 ((1U) << 1) /* IOMMU Exceptional Status */ enum exceptype { DTE_LEVEL1 = 0x0, DTE_LEVEL2, PTE_LEVEL1, PTE_LEVEL2, UNAUTHORIZED_ACCESS, ILLEGAL_RESPONSE, DTE_LEVEL1_VAL, DTE_LEVEL2_VAL, PTE_LEVEL1_VAL, PTE_LEVEL2_VAL, }; u64 iommu_enable_cmd; /* default IOMMU boot param: 0 */ unsigned long *sunway_iommu_domain_bitmap; static DEFINE_SPINLOCK(domain_bitmap_lock); static DEFINE_SPINLOCK(sunway_iommu_device_table_lock); spinlock_t sunway_domain_lock; static LLIST_HEAD(dev_data_list); LIST_HEAD(sunway_domain_list); struct dma_domain { struct sunway_iommu_domain sdomain; struct iova_domain iovad; }; const struct iommu_ops sunway_iommu_ops; static int iommu_identity_mapping; /* flush helpers */ static void piu_flush_all(struct pci_controller *hose) { write_piu_ior0(hose->node, hose->index, DTLB_FLUSHALL, 0); write_piu_ior0(hose->node, hose->index, PTLB_FLUSHALL, 0); write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHALL, 0); } void dev_flush_dtlb(struct sunway_iommu_domain *sdomain, struct sunway_iommu_dev *sdev_data) { struct pci_controller *hose; int devid; list_for_each_entry(sdev_data, &sdomain->dev_list, list) { hose = pci_bus_to_pci_controller(sdev_data->pdev->bus); devid = sdev_data->devid; write_piu_ior0(hose->node, hose->index, DTLB_FLUSHDEV, devid); } } void flush_pcache_by_addr(struct sunway_iommu_domain *sdomain, unsigned long flush_addr) { struct pci_controller *hose; struct sunway_iommu_dev *sdev_data; list_for_each_entry(sdev_data, &sdomain->dev_list, list) { hose = pci_bus_to_pci_controller(sdev_data->pdev->bus); flush_addr = __pa(flush_addr); write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHPADDR, flush_addr); } } void flush_ptlb_by_addr(struct sunway_iommu_domain *sdomain, unsigned long flush_addr) { struct pci_controller *hose; struct pci_dev *pdev; struct sunway_iommu_dev *sdev_data; list_for_each_entry(sdev_data, &sdomain->dev_list, list) { pdev = sdev_data->pdev; hose = pci_bus_to_pci_controller(pdev->bus); flush_addr = (pdev->bus->number << 8) | pdev->devfn | (flush_addr << 16); write_piu_ior0(hose->node, hose->index, PTLB_FLUSHVADDR, flush_addr); } } /* domain helpers */ static struct sunway_iommu_domain *to_sunway_domain(struct iommu_domain *dom) { return container_of(dom, struct sunway_iommu_domain, domain); } static struct dma_domain *to_dma_domain(struct sunway_iommu_domain *sdomain) { return container_of(sdomain, struct dma_domain, sdomain); } static void add_domain_to_list(struct sunway_iommu_domain *sdomain) { unsigned long flags; spin_lock_irqsave(&sunway_domain_lock, flags); list_add(&sdomain->list, &sunway_domain_list); spin_unlock_irqrestore(&sunway_domain_lock, flags); } static void del_domain_from_list(struct sunway_iommu_domain *sdomain) { unsigned long flags; spin_lock_irqsave(&sunway_domain_lock, flags); list_del(&sdomain->list); spin_unlock_irqrestore(&sunway_domain_lock, flags); } static void free_pagetable(struct sunway_iommu_domain *sdomain) { unsigned long pde; unsigned long *pde_ptr; int i, pdes_one_page; pde_ptr = sdomain->pt_root; if (!pde_ptr) return; pdes_one_page = PAGE_SIZE/sizeof(pde); for (i = 0; i < pdes_one_page; i++, pde_ptr++) { pde = *pde_ptr; if ((pde & SW64_IOMMU_ENTRY_VALID) == 0) continue; pde &= ~(SW64_IOMMU_ENTRY_VALID) & PAGE_MASK; pde |= PAGE_OFFSET; free_page(pde); } free_page((unsigned long)sdomain->pt_root); } static void domain_id_free(int id) { spin_lock(&domain_bitmap_lock); if (id > 0) __clear_bit(id, sunway_iommu_domain_bitmap); spin_unlock(&domain_bitmap_lock); } static void dma_domain_free(struct dma_domain *dma_dom) { if (!dma_dom) return; del_domain_from_list(&dma_dom->sdomain); put_iova_domain(&dma_dom->iovad); free_pagetable(&dma_dom->sdomain); if (dma_dom->sdomain.id) domain_id_free(dma_dom->sdomain.id); kfree(dma_dom); } static void sunway_domain_free(struct sunway_iommu_domain *sdomain) { if (!sdomain) return; del_domain_from_list(sdomain); if (sdomain->id) domain_id_free(sdomain->id); kfree(sdomain); } static u16 sunway_domain_id_alloc(void) { int id; spin_lock(&domain_bitmap_lock); id = find_first_zero_bit(sunway_iommu_domain_bitmap, MAX_DOMAIN_NUM); if (id > 0 && id < MAX_DOMAIN_NUM) __set_bit(id, sunway_iommu_domain_bitmap); else id = 0; spin_unlock(&domain_bitmap_lock); return id; } static int sunway_domain_init(struct sunway_iommu_domain *sdomain) { spin_lock_init(&sdomain->lock); mutex_init(&sdomain->api_lock); sdomain->id = sunway_domain_id_alloc(); if (!sdomain->id) return -ENOMEM; INIT_LIST_HEAD(&sdomain->dev_list); return 1; } static struct sunway_iommu_domain *sunway_domain_alloc(void) { struct sunway_iommu_domain *sdomain; sdomain = kzalloc(sizeof(struct sunway_iommu_domain), GFP_KERNEL); if (!sdomain) return NULL; if (!sunway_domain_init(sdomain)) { kfree(sdomain); return NULL; } add_domain_to_list(sdomain); return sdomain; } static struct dma_domain *dma_domain_alloc(void) { struct dma_domain *dma_dom; struct page; dma_dom = kzalloc(sizeof(struct dma_domain), GFP_KERNEL); if (!dma_dom) return NULL; sunway_domain_init(&dma_dom->sdomain); dma_dom->sdomain.type = IOMMU_DOMAIN_DMA; dma_dom->sdomain.pt_root = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (dma_dom->sdomain.pt_root == NULL) { pr_err("Allocating a new sdomain pt_root failed!\n"); dma_domain_free(dma_dom); return NULL; } add_domain_to_list(&dma_dom->sdomain); return dma_dom; } static void device_flush_all(struct sunway_iommu_dev *sdata) { struct pci_controller *hose = pci_bus_to_pci_controller(sdata->pdev->bus); if (hose == NULL) return; write_piu_ior0(hose->node, hose->index, DTLB_FLUSHDEV, sdata->devid); write_piu_ior0(hose->node, hose->index, PTLB_FLUSHDEV, sdata->devid); write_piu_ior0(hose->node, hose->index, PCACHE_FLUSHDEV, sdata->devid); } /* iommu_ops device attach/unattach helpers */ static void set_dte_entry(struct sunway_iommu_dev *sdev, struct sunway_iommu_domain *sdomain) { struct sunway_iommu *iommu; struct pci_dev *pdev; struct page *page; unsigned long *dte_l1, *dte_l2; unsigned long dte_l1_val, dte_l2_base, dte_l2_val; pdev = sdev->pdev; if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) return; sdev->devid = PCI_DEVID(pdev->bus->number, pdev->devfn); iommu = sdev->iommu; dte_l1 = iommu->iommu_dtbr + (pdev->bus->number); dte_l1_val = *dte_l1; if (!dte_l1_val) { /* Alloc a new level-2 device table page */ page = alloc_pages_node(iommu->node, __GFP_ZERO, get_order(PAGE_SIZE)); if (!page) { pr_err("Allocating a new level-2 device table page failed.\n"); return; } dte_l2_base = (unsigned long)page_address(page); dte_l1_val = (__pa(dte_l2_base) & PAGE_MASK) | SW64_IOMMU_ENTRY_VALID; *dte_l1 = dte_l1_val; } dte_l2 = __va(dte_l1_val & ~(SW64_IOMMU_ENTRY_VALID) & PAGE_MASK) + (pdev->devfn << 3); dte_l2_val = (__pa(sdomain->pt_root) & PAGE_MASK) | SW64_IOMMU_ENTRY_VALID; if (iommu_identity_mapping) { dte_l2_val |= 0x1; sdev->passthrough = IDENTMAP_ALL; } *dte_l2 = dte_l2_val; device_flush_all(sdev); } static void do_attach(struct sunway_iommu_dev *sdev_data, struct sunway_iommu_domain *sdomain) { sdev_data->domain = sdomain; list_add(&sdev_data->list, &sdomain->dev_list); sdomain->dev_cnt++; set_dte_entry(sdev_data, sdomain); pr_debug("iommu: device %d add to domain: %d\n", sdev_data->devid, sdomain->id); } static void do_detach(struct sunway_iommu_dev *sdev_data) { struct sunway_iommu_domain *sdomain = sdev_data->domain; sdev_data->domain = NULL; list_del(&sdev_data->list); device_flush_all(sdev_data); sdomain->dev_cnt--; pr_debug("iommu: device %d detached from domain %d\n", sdev_data->devid, sdomain->id); } static int __attach_device(struct sunway_iommu_dev *sdev_data, struct sunway_iommu_domain *sdomain) { int ret; spin_lock(&sdomain->lock); ret = -EBUSY; if (sdev_data->domain != NULL) goto out_unlock; do_attach(sdev_data, sdomain); ret = 0; out_unlock: spin_unlock(&sdomain->lock); return ret; } static void __detach_device(struct sunway_iommu_dev *sunway_dev_data) { struct sunway_iommu_domain *domain; domain = sunway_dev_data->domain; spin_lock(&domain->lock); do_detach(sunway_dev_data); spin_unlock(&domain->lock); } static int attach_device(struct device *dev, struct sunway_iommu_domain *sdomain) { struct sunway_iommu_dev *sdev; unsigned long flags; int ret; sdev = dev_iommu_priv_get(dev); spin_lock_irqsave(&sunway_iommu_device_table_lock, flags); ret = __attach_device(sdev, sdomain); spin_unlock_irqrestore(&sunway_iommu_device_table_lock, flags); return ret; } static void detach_device(struct device *dev) { struct sunway_iommu_domain *sunway_domain; struct sunway_iommu_dev *sdev_data; unsigned long flags; sdev_data = dev_iommu_priv_get(dev); sunway_domain = sdev_data->domain; if (WARN_ON(!sdev_data->domain)) return; spin_lock_irqsave(&sunway_iommu_device_table_lock, flags); __detach_device(sdev_data); spin_unlock_irqrestore(&sunway_iommu_device_table_lock, flags); if (!dev_is_pci(dev)) return; } static struct sunway_iommu_dev *search_dev_data(u16 devid) { struct sunway_iommu_dev *sdev_data; struct llist_node *node; if (llist_empty(&dev_data_list)) return NULL; node = dev_data_list.first; llist_for_each_entry(sdev_data, node, dev_data_list) { if (sdev_data->devid == devid) return sdev_data; } return NULL; } /********************************************************************** * * Following functions describe IOMMU init ops * **********************************************************************/ static struct sunway_iommu *sunway_iommu_early_init(struct pci_controller *hose) { struct sunway_iommu *iommu; struct page *page; unsigned long base; hose->pci_iommu = kzalloc(sizeof(struct sunway_iommu), GFP_KERNEL); if (!hose->pci_iommu) return 0; iommu = hose->pci_iommu; spin_lock_init(&iommu->dt_lock); iommu->node = hose->node; if (!node_online(hose->node)) iommu->node = -1; page = alloc_pages_node(iommu->node, __GFP_ZERO, get_order(PAGE_SIZE)); if (!page) { pr_err("Allocating a new iommu_dtbr page failed.\n"); kfree(hose->pci_iommu); return NULL; } iommu->iommu_dtbr = page_address(page); iommu->hose_pt = hose; iommu->index = hose->index; iommu->enabled = true; base = __pa(iommu->iommu_dtbr) & PAGE_MASK; write_piu_ior0(hose->node, hose->index, DTBASEADDR, base); return iommu; } unsigned long fetch_dte(struct sunway_iommu *iommu, unsigned long devid, enum exceptype type) { unsigned long *dte_l1, *dte_l2; unsigned long dte_l1_val, dte_l2_val; if (!iommu) return 0; dte_l1 = iommu->iommu_dtbr + (devid >> 8); if (type == DTE_LEVEL1) return (unsigned long)dte_l1; dte_l1_val = *dte_l1; if (type == DTE_LEVEL1_VAL) return dte_l1_val; dte_l1_val &= (~(SW64_IOMMU_ENTRY_VALID)) & (PAGE_MASK); dte_l1_val |= PAGE_OFFSET; dte_l2 = (unsigned long *)(dte_l1_val + ((devid & 0xff) << 3)); if (type == DTE_LEVEL2) return (unsigned long)dte_l2; dte_l2_val = *dte_l2; if (type == DTE_LEVEL2_VAL) return dte_l2_val; return dte_l2_val; } unsigned long fetch_pte(struct sunway_iommu_domain *sdomain, dma_addr_t iova, enum exceptype type) { unsigned long iova_pfn, pte_l1_val, pte_l2_val; unsigned long *pte_l1, *pte_l2; unsigned long pte_root; unsigned long offset; if (!sdomain) return -EINVAL; pte_root = __pa(sdomain->pt_root) & PAGE_MASK; iova_pfn = iova >> PAGE_SHIFT; pte_root = ((pte_root) & (~(SW64_IOMMU_ENTRY_VALID)) & (PAGE_MASK)); pte_root |= PAGE_OFFSET; offset = ((iova_pfn >> 10) & SW64_IOMMU_LEVEL1_OFFSET) << 3; pte_l1 = (unsigned long *)(pte_root + offset); if (type == PTE_LEVEL1) return (unsigned long)pte_l1; pte_l1_val = *pte_l1; if (type == PTE_LEVEL1_VAL) return pte_l1_val; pte_l1_val &= (~(SW64_IOMMU_ENTRY_VALID)) & (PAGE_MASK); pte_l1_val |= PAGE_OFFSET; offset = (iova_pfn & SW64_IOMMU_LEVEL2_OFFSET) << 3; pte_l2 = (unsigned long *)(pte_l1_val + offset); if (type == PTE_LEVEL2) return (unsigned long)pte_l2; pte_l2_val = *pte_l2; if (type == PTE_LEVEL2_VAL) return pte_l2_val; return pte_l2_val; } /* IOMMU Interrupt handle */ irqreturn_t iommu_interrupt(int irq, void *dev) { struct pci_controller *hose = (struct pci_controller *)dev; struct sunway_iommu_domain *sdomain; struct sunway_iommu_dev *sdev; unsigned long iommu_status; unsigned long type; unsigned long devid, dva; iommu_status = read_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS); if (!(iommu_status >> 63)) return IRQ_NONE; type = (iommu_status >> 59) & 0x7; devid = (iommu_status >> 37) & 0xffff; dva = iommu_status & 0xffffffff; pr_info("%s, iommu_status = %#lx, devid %#lx, dva %#lx, ", __func__, iommu_status, devid, dva); sdev = search_dev_data(devid); if (sdev == NULL) { pr_info("no such dev!!!\n"); iommu_status &= ~(1UL << 62); write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status); return IRQ_HANDLED; } sdomain = sdev->domain; switch (type) { case DTE_LEVEL1: pr_info("invalid level1 dte, addr:%#lx, val:%#lx\n", fetch_dte(hose->pci_iommu, devid, DTE_LEVEL1), fetch_dte(hose->pci_iommu, devid, DTE_LEVEL1_VAL)); break; case DTE_LEVEL2: pr_info("invalid level2 dte, addr:%#lx, val:%#lx\n", fetch_dte(hose->pci_iommu, devid, DTE_LEVEL2), fetch_dte(hose->pci_iommu, devid, DTE_LEVEL2_VAL)); break; case PTE_LEVEL1: pr_info("invalid level1 pte, addr: %#lx, val:%#lx\n", fetch_pte(sdomain, dva, PTE_LEVEL1), fetch_pte(sdomain, dva, PTE_LEVEL1_VAL)); break; case PTE_LEVEL2: pr_info("invalid level2 pte, addr: %#lx, val: %#lx\n", fetch_pte(sdomain, dva, PTE_LEVEL2), fetch_pte(sdomain, dva, PTE_LEVEL2_VAL)); iommu_status &= ~(1UL << 62); write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status); break; case UNAUTHORIZED_ACCESS: pr_info("unauthorized access\n"); break; case ILLEGAL_RESPONSE: pr_info("illegal response\n"); break; default: pr_info("unknown error\n"); break; } return IRQ_HANDLED; } struct irqaction iommu_irqaction = { .handler = iommu_interrupt, .flags = IRQF_SHARED | IRQF_NO_THREAD, .name = "sunway_iommu", }; void sunway_enable_iommu_func(struct pci_controller *hose) { unsigned int iommu_irq, err; unsigned long iommu_conf, iommu_ctrl; iommu_irq = hose->int_irq; pr_debug("%s node %ld rc %ld iommu_irq %d\n", __func__, hose->node, hose->index, iommu_irq); err = request_irq(iommu_irq, iommu_interrupt, IRQF_SHARED, "sunway_iommu", hose); if (err < 0) pr_info("sw iommu request irq failed!\n"); iommu_ctrl = (1UL << 63) | (0x100UL << 10); write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_CTRL, iommu_ctrl); iommu_conf = read_piu_ior0(hose->node, hose->index, PIUCONFIG0); iommu_conf = iommu_conf | (0x3 << 7); write_piu_ior0(hose->node, hose->index, PIUCONFIG0, iommu_conf); write_piu_ior0(hose->node, hose->index, TIMEOUT_CONFIG, 0xf); iommu_conf = read_piu_ior0(hose->node, hose->index, PIUCONFIG0); pr_debug("SW arch configure node %ld hose-%ld iommu_conf = %#lx\n", hose->node, hose->index, iommu_conf); } static bool is_iommu_enable(struct pci_controller *hose) { u64 rc_mask = 0x1; rc_mask <<= (8 * hose->node + hose->index); if (iommu_enable_cmd & rc_mask) return true; return false; } /* iommu cpu syscore ops */ static int iommu_cpu_suspend(void) { return 0; } static void iommu_cpu_resume(void) { } struct syscore_ops iommu_cpu_syscore_ops = { .suspend = iommu_cpu_suspend, .resume = iommu_cpu_resume, }; static struct iommu_domain *sunway_iommu_domain_alloc(unsigned int type); static int sunway_iommu_init(void) { struct pci_controller *hose; struct sunway_iommu *iommu; int ret; int iommu_index = 0; sunway_iommu_domain_bitmap = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_NUM / 8)); if (sunway_iommu_domain_bitmap == NULL) return 0; __set_bit(0, sunway_iommu_domain_bitmap); /* Do the loop */ for (hose = hose_head; hose; hose = hose->next) { if (!is_iommu_enable(hose)) { hose->iommu_enable = false; continue; } iommu = sunway_iommu_early_init(hose); if (!iommu) { pr_err("Allocating sunway_iommu failed\n"); hose->iommu_enable = false; continue; } iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "%d", iommu_index); iommu_index++; sunway_enable_iommu_func(hose); hose->iommu_enable = true; iommu_device_register(&iommu->iommu, &sunway_iommu_ops, NULL); } ret = iova_cache_get(); if (ret) return ret; for (hose = hose_head; hose; hose = hose->next) if (hose->iommu_enable) piu_flush_all(hose); register_syscore_ops(&iommu_cpu_syscore_ops); return 1; } device_initcall(sunway_iommu_init); /******************************************************************************* * * DMA OPS Functions * ******************************************************************************/ struct sunway_iommu *get_first_iommu_from_domain(struct sunway_iommu_domain *sdomain) { struct sunway_iommu *iommu; struct sunway_iommu_dev *entry; entry = list_first_entry(&sdomain->dev_list, struct sunway_iommu_dev, list); iommu = entry->iommu; return iommu; } static unsigned long sunway_iommu_unmap_page(struct sunway_iommu_domain *sunway_domain, unsigned long iova, unsigned long page_size) { unsigned long *pte_l2, unmapped; pr_debug("%s iova %#lx, page_size %#lx\n", __func__, iova, page_size); BUG_ON(!is_power_of_2(page_size)); unmapped = 0; while (unmapped < page_size) { pte_l2 = (unsigned long *)fetch_pte(sunway_domain, iova, PTE_LEVEL2); *pte_l2 = 0; flush_pcache_by_addr(sunway_domain, (unsigned long)pte_l2); flush_ptlb_by_addr(sunway_domain, (iova >> PAGE_SHIFT)); iova += PAGE_SIZE; unmapped += PAGE_SIZE; } return unmapped; } int sunway_iommu_map_page(struct sunway_iommu_domain *sunway_domain, unsigned long bus_addr, unsigned long paddr, size_t page_size) { /* * pde: page table entry * pte: level 2 page table entry * pte_root: page table root */ struct page *page; struct sunway_iommu *iommu; unsigned long pde, pte, iova_pfn; unsigned long pdebaseaddr; u64 *ptebasecond, ptebaseaddr; u64 pte_root = (__pa(sunway_domain->pt_root) & PAGE_MASK); iova_pfn = (unsigned long)(bus_addr >> PAGE_SHIFT); pdebaseaddr = ((iova_pfn >> 10) & SW64_IOMMU_LEVEL1_OFFSET) << 3; pdebaseaddr += ((pte_root) & (~(SW64_IOMMU_ENTRY_VALID)) & (PAGE_MASK)) + PAGE_OFFSET; pde = *(unsigned long *)pdebaseaddr; if (pde) { ptebaseaddr = (pde & (~SW64_IOMMU_ENTRY_VALID) & PAGE_MASK) + PAGE_OFFSET; ptebaseaddr += (iova_pfn & SW64_IOMMU_LEVEL2_OFFSET) << 3; goto direct_map; } iommu = get_first_iommu_from_domain(sunway_domain); if (!iommu) return -1; page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); if (!page) { pr_err("Allocating pages failed.\n"); return -1; } ptebasecond = page_address(page); pde = (__pa(ptebasecond) & PAGE_MASK) | SW64_IOMMU_ENTRY_VALID; /* * If pde exists, no need to allocate a new page. * Atomic compare and exchange, compare the value the pointer points to * with 0UL. If identical, store pde where the pointer points to, return * 0UL. Otherwise, return the value the pointer points to. */ if (cmpxchg64((volatile u64 *)pdebaseaddr, 0ULL, pde)) { ptebaseaddr = ((*(volatile u64 *)pdebaseaddr) & (~SW64_IOMMU_ENTRY_VALID) & PAGE_MASK) + PAGE_OFFSET; ptebaseaddr += (iova_pfn & SW64_IOMMU_LEVEL2_OFFSET) << 3; free_page((unsigned long)ptebasecond); } else { flush_pcache_by_addr(sunway_domain, pdebaseaddr); ptebaseaddr = (unsigned long)ptebasecond + ((iova_pfn & SW64_IOMMU_LEVEL2_OFFSET) << 3); } direct_map: /* case 8K */ if (page_size == (1UL << PAGE_SHIFT)) { if (*(volatile u64 *)ptebaseaddr) { pr_err("IOVA 4G overlap. IOVA is %#lx.\n", bus_addr); return -EFAULT; } pte = (paddr & PAGE_MASK) | SW64_IOMMU_ENTRY_VALID | SW64_IOMMU_GRN_8K | SW64_IOMMU_ENABLE; *(volatile u64 *)ptebaseaddr = pte; flush_pcache_by_addr(sunway_domain, ptebaseaddr); /* case 8M */ } else if (page_size == (1UL << PAGE_8M_SHIFT)) { unsigned long *ptr; int i, ptes_one_page, ptes_one_cache; ptr = (unsigned long *)ptebaseaddr; ptes_one_page = PAGE_SIZE/sizeof(pte); ptes_one_cache = L1_CACHE_BYTES/sizeof(pte); pte = (paddr & PAGE_MASK) | SW64_IOMMU_ENTRY_VALID | SW64_IOMMU_GRN_8M | SW64_IOMMU_ENABLE; for (i = 0; i < ptes_one_page; i++) { if (*ptr) { pr_err("IOVA 4G overlap. IOVA is %#lx.\n", bus_addr); return -EFAULT; } *ptr = pte; /* just do once flush per cache line */ if (i % ptes_one_cache == (ptes_one_cache - 1)) flush_pcache_by_addr(sunway_domain, (unsigned long)ptr); ptr++; } } #ifdef CONFIG_SW64_GUEST flush_ptlb_by_addr(sunway_domain, pfn | SW64_IOMMU_MAP_FLAG); #endif return 0; } /********************************************************************** * * IOMMU OPS Functions * **********************************************************************/ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned int type) { struct sunway_iommu_domain *sdomain; struct dma_domain *dma_dom; switch (type) { case IOMMU_DOMAIN_UNMANAGED: sdomain = sunway_domain_alloc(); if (!sdomain) { pr_err("Allocating sunway_domain failed!\n"); return NULL; } sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); if (!sdomain->pt_root) { pr_err("Allocating pt_root failed!\n"); sunway_domain_free(sdomain); return NULL; } sdomain->domain.geometry.aperture_start = 0ULL; sdomain->domain.geometry.aperture_end = (~0ULL); sdomain->domain.geometry.force_aperture = true; sdomain->type = IOMMU_DOMAIN_UNMANAGED; break; case IOMMU_DOMAIN_DMA: dma_dom = dma_domain_alloc(); if (!dma_dom) { pr_err("Failed to alloc dma domain!\n"); return NULL; } sdomain = &dma_dom->sdomain; break; case IOMMU_DOMAIN_IDENTITY: sdomain = sunway_domain_alloc(); if (!sdomain) return NULL; sdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); if (!sdomain->pt_root) { pr_err("Allocating pt_root failed!\n"); sunway_domain_free(sdomain); return NULL; } sdomain->type = IOMMU_DOMAIN_IDENTITY; iommu_identity_mapping = 1; break; default: return NULL; } return &sdomain->domain; } static void clean_domain(struct sunway_iommu_domain *sdomain) { struct sunway_iommu_dev *entry; unsigned long flags; spin_lock_irqsave(&sunway_iommu_device_table_lock, flags); while (!list_empty(&sdomain->dev_list)) { entry = list_first_entry(&sdomain->dev_list, struct sunway_iommu_dev, list); __detach_device(entry); } spin_unlock_irqrestore(&sunway_iommu_device_table_lock, flags); } static void sunway_iommu_domain_free(struct iommu_domain *dom) { struct sunway_iommu_domain *sdomain; struct dma_domain *dma_dom; sdomain = to_sunway_domain(dom); if (sdomain->dev_cnt > 0) clean_domain(sdomain); if (!dom) return; switch (dom->type) { case IOMMU_DOMAIN_DMA: dma_dom = to_dma_domain(sdomain); dma_domain_free(dma_dom); break; default: free_pagetable(sdomain); sunway_domain_free(sdomain); break; } } static int sunway_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); struct sunway_iommu_dev *sdev_data; struct pci_dev *pdev; struct pci_controller *hose; int ret; if (!dev_is_pci(dev)) return -ENODEV; pdev = to_pci_dev(dev); if (!pdev) return -EINVAL; hose = pci_bus_to_pci_controller(pdev->bus); if (!hose) return -EINVAL; if (!hose->iommu_enable) return -EINVAL; sdev_data = dev_iommu_priv_get(dev); if (!sdev_data) return -EINVAL; if (sdev_data->domain) detach_device(dev); ret = attach_device(dev, sdomain); return ret; } static phys_addr_t sunway_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); unsigned long paddr, grn; if (iova >= SW64_BAR_ADDRESS) return iova; paddr = fetch_pte(sdomain, iova, PTE_LEVEL2_VAL); if ((paddr & SW64_IOMMU_ENTRY_VALID) == 0) return 0; paddr &= ~SW64_IOMMU_ENTRY_VALID; grn = paddr & SW64_PTE_GRN_MASK; /* get page granularity */ paddr &= PAGE_MASK; switch (grn) { case SW64_IOMMU_GRN_8M: paddr += (iova & ~HPAGE_MASK); break; case SW64_IOMMU_GRN_8K: default: paddr += (iova & ~PAGE_MASK); break; } return paddr; } static int sunway_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, phys_addr_t paddr, size_t page_size, size_t pgcount, int iommu_prot, gfp_t gfp, size_t *mapped) { struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); size_t size = pgcount << PAGE_SHIFT; int ret; /* * As VFIO cannot distinguish between normal DMA request * and pci device BAR, check should be introduced manually * to avoid VFIO trying to map pci config space. */ if (iova >= SW64_BAR_ADDRESS) return 0; mutex_lock(&sdomain->api_lock); while (pgcount--) { ret = sunway_iommu_map_page(sdomain, iova, paddr, page_size); if (ret) { pr_info("Failed to map page from IOVA %lx.\n", iova); return ret; } iova += page_size; paddr += page_size; } mutex_unlock(&sdomain->api_lock); if (!ret && mapped) *mapped = size; return ret; } static size_t sunway_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova, size_t page_size, size_t pgcount, struct iommu_iotlb_gather *gather) { struct sunway_iommu_domain *sdomain = to_sunway_domain(dom); size_t unmap_size; size_t total_unmap = 0; if (iova >= SW64_BAR_ADDRESS) return page_size; mutex_lock(&sdomain->api_lock); while (pgcount--) { unmap_size = sunway_iommu_unmap_page(sdomain, iova, page_size); iova += page_size; total_unmap += page_size; } mutex_unlock(&sdomain->api_lock); return total_unmap; } static struct iommu_group *sunway_iommu_device_group(struct device *dev) { return generic_device_group(dev); } static int iommu_init_device(struct device *dev) { struct sunway_iommu_dev *sdev; struct sunway_iommu *iommu; struct pci_dev *pdev; struct pci_controller *hose; if (dev_iommu_priv_get(dev)) return 0; sdev = kzalloc(sizeof(struct sunway_iommu_dev), GFP_KERNEL); if (!sdev) return -ENOMEM; pdev = to_pci_dev(dev); hose = pci_bus_to_pci_controller(pdev->bus); iommu = hose->pci_iommu; llist_add(&sdev->dev_data_list, &dev_data_list); sdev->pdev = pdev; sdev->iommu = iommu; dev_iommu_priv_set(dev, sdev); return 0; } static void iommu_uninit_device(struct device *dev) { struct sunway_iommu_dev *sdev; sdev = dev_iommu_priv_get(dev); if (!sdev) return; if (sdev->domain) detach_device(dev); dev_iommu_priv_set(dev, NULL); } static void sunway_iommu_release_device(struct device *dev) { struct pci_dev *pdev; struct pci_controller *hose; pdev = to_pci_dev(dev); if (!pdev) return; hose = pci_bus_to_pci_controller(pdev->bus); if (!hose->iommu_enable) return; iommu_uninit_device(dev); } static struct iommu_device *sunway_iommu_probe_device(struct device *dev) { struct pci_dev *pdev; struct pci_controller *hose; struct sunway_iommu *iommu; int ret; if (!dev_is_pci(dev)) return ERR_PTR(-ENODEV); pdev = to_pci_dev(dev); if (!pdev) return ERR_PTR(-ENODEV); hose = pci_bus_to_pci_controller(pdev->bus); if (!hose) return ERR_PTR(-ENODEV); if (!hose->iommu_enable) return ERR_PTR(-ENODEV); if (dev_iommu_priv_get(dev)) return &iommu->iommu; ret = iommu_init_device(dev); if (ret) return ERR_PTR(ret); iommu = hose->pci_iommu; return &iommu->iommu; } static int sunway_iommu_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { if (iommu_identity_mapping) return IOMMU_DOMAIN_IDENTITY; } return 0; } static bool sunway_iommu_capable(struct device *dev, enum iommu_cap cap) { return false; } static void sunway_iommu_probe_finalize(struct device *dev) { set_dma_ops(dev, NULL); iommu_setup_dma_ops(dev, 0, SW64_DMA_LIMIT); } const struct iommu_ops sunway_iommu_ops = { .capable = sunway_iommu_capable, .domain_alloc = sunway_iommu_domain_alloc, .probe_device = sunway_iommu_probe_device, .probe_finalize = sunway_iommu_probe_finalize, .release_device = sunway_iommu_release_device, .device_group = sunway_iommu_device_group, .pgsize_bitmap = SW64_IOMMU_PGSIZES, .def_domain_type = sunway_iommu_def_domain_type, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sunway_iommu_attach_device, .map_pages = sunway_iommu_map_pages, .unmap_pages = sunway_iommu_unmap_pages, .iova_to_phys = sunway_iommu_iova_to_phys, .free = sunway_iommu_domain_free, } }; /***************************************************************************** * * Boot param handle * Each bit of iommu_enable bitmap represents an rc enable, and every 8 bits * represents one cpu node. For example, iommu_enable=0x0100 means enabling * rc0 for cpu node 1. * *****************************************************************************/ static int __init iommu_enable_setup(char *str) { int ret; unsigned long rc_bitmap = 0xffffffffUL; ret = kstrtoul(str, 16, &rc_bitmap); iommu_enable_cmd = rc_bitmap; return ret; } early_param("iommu_enable", iommu_enable_setup);