308 lines
7.5 KiB
C
Raw Normal View History

2026-01-21 18:59:54 +08:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* Support KVM gust page tracking
*
* This feature allows us to track page access in guest. Currently, only
* write access is tracked.
*
* Copyright(C) 2015 Intel Corporation.
*
* Author:
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
*/
2026-01-29 22:25:33 +08:00
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
#include <linux/lockdep.h>
2026-01-21 18:59:54 +08:00
#include <linux/kvm_host.h>
#include <linux/rculist.h>
2026-01-29 22:25:33 +08:00
#include "mmu.h"
2026-01-21 18:59:54 +08:00
#include "mmu_internal.h"
2026-01-29 22:25:33 +08:00
#include "page_track.h"
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
2026-01-21 18:59:54 +08:00
{
2026-01-29 22:25:33 +08:00
return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
!tdp_enabled || kvm_shadow_root_allocated(kvm);
}
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
kvfree(slot->arch.gfn_write_track);
slot->arch.gfn_write_track = NULL;
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
unsigned long npages)
2026-01-21 18:59:54 +08:00
{
2026-01-29 22:25:33 +08:00
const size_t size = sizeof(*slot->arch.gfn_write_track);
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
if (!slot->arch.gfn_write_track)
slot->arch.gfn_write_track = __vcalloc(npages, size,
GFP_KERNEL_ACCOUNT);
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
return slot->arch.gfn_write_track ? 0 : -ENOMEM;
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
int kvm_page_track_create_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot,
unsigned long npages)
2026-01-21 18:59:54 +08:00
{
2026-01-29 22:25:33 +08:00
if (!kvm_page_track_write_tracking_enabled(kvm))
return 0;
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
return __kvm_page_track_write_tracking_alloc(slot, npages);
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
{
return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
}
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
short count)
2026-01-21 18:59:54 +08:00
{
int index, val;
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
2026-01-29 22:25:33 +08:00
val = slot->arch.gfn_write_track[index];
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
2026-01-21 18:59:54 +08:00
return;
2026-01-29 22:25:33 +08:00
slot->arch.gfn_write_track[index] += count;
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn)
2026-01-21 18:59:54 +08:00
{
2026-01-29 22:25:33 +08:00
lockdep_assert_held_write(&kvm->mmu_lock);
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
srcu_read_lock_held(&kvm->srcu));
2026-01-21 18:59:54 +08:00
2026-01-29 22:25:33 +08:00
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
2026-01-21 18:59:54 +08:00
return;
2026-01-29 22:25:33 +08:00
update_gfn_write_track(slot, gfn, 1);
2026-01-21 18:59:54 +08:00
/*
* new track stops large page mapping for the
* tracked page.
*/
kvm_mmu_gfn_disallow_lpage(slot, gfn);
2026-01-29 22:25:33 +08:00
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
kvm_flush_remote_tlbs(kvm);
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
void __kvm_write_track_remove_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn)
2026-01-21 18:59:54 +08:00
{
2026-01-29 22:25:33 +08:00
lockdep_assert_held_write(&kvm->mmu_lock);
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
srcu_read_lock_held(&kvm->srcu));
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
2026-01-21 18:59:54 +08:00
return;
2026-01-29 22:25:33 +08:00
update_gfn_write_track(slot, gfn, -1);
2026-01-21 18:59:54 +08:00
/*
* allow large page mapping for the tracked page
* after the tracker is gone.
*/
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
/*
* check if the corresponding access on the specified guest page is tracked.
*/
2026-01-29 22:25:33 +08:00
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
const struct kvm_memory_slot *slot, gfn_t gfn)
2026-01-21 18:59:54 +08:00
{
int index;
2026-01-29 22:25:33 +08:00
if (!slot)
2026-01-21 18:59:54 +08:00
return false;
2026-01-29 22:25:33 +08:00
if (!kvm_page_track_write_tracking_enabled(kvm))
2026-01-21 18:59:54 +08:00
return false;
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
2026-01-29 22:25:33 +08:00
return !!READ_ONCE(slot->arch.gfn_write_track[index]);
2026-01-21 18:59:54 +08:00
}
2026-01-29 22:25:33 +08:00
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
2026-01-21 18:59:54 +08:00
void kvm_page_track_cleanup(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
head = &kvm->arch.track_notifier_head;
cleanup_srcu_struct(&head->track_srcu);
}
int kvm_page_track_init(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
head = &kvm->arch.track_notifier_head;
INIT_HLIST_HEAD(&head->track_notifier_list);
return init_srcu_struct(&head->track_srcu);
}
/*
* register the notifier so that event interception for the tracked guest
* pages can be received.
*/
2026-01-29 22:25:33 +08:00
int kvm_page_track_register_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n)
2026-01-21 18:59:54 +08:00
{
struct kvm_page_track_notifier_head *head;
2026-01-29 22:25:33 +08:00
if (!kvm || kvm->mm != current->mm)
return -ESRCH;
kvm_get_kvm(kvm);
2026-01-21 18:59:54 +08:00
head = &kvm->arch.track_notifier_head;
2026-01-29 22:25:33 +08:00
write_lock(&kvm->mmu_lock);
2026-01-21 18:59:54 +08:00
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
2026-01-29 22:25:33 +08:00
write_unlock(&kvm->mmu_lock);
return 0;
2026-01-21 18:59:54 +08:00
}
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
/*
* stop receiving the event interception. It is the opposed operation of
* kvm_page_track_register_notifier().
*/
2026-01-29 22:25:33 +08:00
void kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n)
2026-01-21 18:59:54 +08:00
{
struct kvm_page_track_notifier_head *head;
head = &kvm->arch.track_notifier_head;
2026-01-29 22:25:33 +08:00
write_lock(&kvm->mmu_lock);
2026-01-21 18:59:54 +08:00
hlist_del_rcu(&n->node);
2026-01-29 22:25:33 +08:00
write_unlock(&kvm->mmu_lock);
2026-01-21 18:59:54 +08:00
synchronize_srcu(&head->track_srcu);
2026-01-29 22:25:33 +08:00
kvm_put_kvm(kvm);
2026-01-21 18:59:54 +08:00
}
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
/*
* Notify the node that write access is intercepted and write emulation is
* finished at this time.
*
* The node should figure out if the written page is the one that node is
* interested in by itself.
*/
2026-01-29 22:25:33 +08:00
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
2026-01-21 18:59:54 +08:00
{
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
int idx;
2026-01-29 22:25:33 +08:00
head = &kvm->arch.track_notifier_head;
2026-01-21 18:59:54 +08:00
if (hlist_empty(&head->track_notifier_list))
return;
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
2026-01-29 22:25:33 +08:00
srcu_read_lock_held(&head->track_srcu))
2026-01-21 18:59:54 +08:00
if (n->track_write)
2026-01-29 22:25:33 +08:00
n->track_write(gpa, new, bytes, n);
2026-01-21 18:59:54 +08:00
srcu_read_unlock(&head->track_srcu, idx);
}
/*
2026-01-29 22:25:33 +08:00
* Notify external page track nodes that a memory region is being removed from
* the VM, e.g. so that users can free any associated metadata.
2026-01-21 18:59:54 +08:00
*/
2026-01-29 22:25:33 +08:00
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
2026-01-21 18:59:54 +08:00
{
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
int idx;
head = &kvm->arch.track_notifier_head;
if (hlist_empty(&head->track_notifier_list))
return;
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
2026-01-29 22:25:33 +08:00
srcu_read_lock_held(&head->track_srcu))
if (n->track_remove_region)
n->track_remove_region(slot->base_gfn, slot->npages, n);
2026-01-21 18:59:54 +08:00
srcu_read_unlock(&head->track_srcu, idx);
}
2026-01-29 22:25:33 +08:00
/*
* add guest page to the tracking pool so that corresponding access on that
* page will be intercepted.
*
* @kvm: the guest instance we are interested in.
* @gfn: the guest page.
*/
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
int idx;
idx = srcu_read_lock(&kvm->srcu);
slot = gfn_to_memslot(kvm, gfn);
if (!slot) {
srcu_read_unlock(&kvm->srcu, idx);
return -EINVAL;
}
write_lock(&kvm->mmu_lock);
__kvm_write_track_add_gfn(kvm, slot, gfn);
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
/*
* remove the guest page from the tracking pool which stops the interception
* of corresponding access on that page.
*
* @kvm: the guest instance we are interested in.
* @gfn: the guest page.
*/
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
int idx;
idx = srcu_read_lock(&kvm->srcu);
slot = gfn_to_memslot(kvm, gfn);
if (!slot) {
srcu_read_unlock(&kvm->srcu, idx);
return -EINVAL;
}
write_lock(&kvm->mmu_lock);
__kvm_write_track_remove_gfn(kvm, slot, gfn);
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
#endif