1210 lines
28 KiB
C
1210 lines
28 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2021 Arm Ltd.
|
|
|
|
#define pr_fmt(fmt) "mpam: resctrl: " fmt
|
|
|
|
#include <linux/arm_mpam.h>
|
|
#include <linux/cacheinfo.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/limits.h>
|
|
#include <linux/list.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/resctrl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/wait.h>
|
|
|
|
#include <asm/mpam.h>
|
|
|
|
#include "mpam_internal.h"
|
|
|
|
u64 mpam_resctrl_default_group;
|
|
|
|
DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
|
|
|
|
/*
|
|
* The classes we've picked to map to resctrl resources.
|
|
* Class pointer may be NULL.
|
|
*/
|
|
static struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES];
|
|
|
|
static bool exposed_alloc_capable;
|
|
static bool exposed_mon_capable;
|
|
static struct mpam_class *mbm_local_class;
|
|
static struct mpam_class *mbm_total_class;
|
|
|
|
/*
|
|
* MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM1_EL1.
|
|
* This applies globally to all traffic the CPU generates.
|
|
*/
|
|
static bool cdp_enabled;
|
|
|
|
/*
|
|
* If resctrl_init() succeeded, resctrl_exit() can be used to remove support
|
|
* for the filesystem in the event of an error.
|
|
*/
|
|
static bool resctrl_enabled;
|
|
|
|
/*
|
|
* mpam_resctrl_pick_caches() needs to know the size of the caches. cacheinfo
|
|
* populates this from a device_initcall(). mpam_resctrl_setup() must wait.
|
|
*/
|
|
static bool cacheinfo_ready;
|
|
static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
|
|
|
|
/* A dummy mon context to use when the monitors were allocated up front */
|
|
u32 __mon_is_rmid_idx = USE_RMID_IDX;
|
|
void *mon_is_rmid_idx = &__mon_is_rmid_idx;
|
|
|
|
bool resctrl_arch_alloc_capable(void)
|
|
{
|
|
return exposed_alloc_capable;
|
|
}
|
|
|
|
bool resctrl_arch_mon_capable(void)
|
|
{
|
|
return exposed_mon_capable;
|
|
}
|
|
|
|
bool resctrl_arch_is_mbm_local_enabled(void)
|
|
{
|
|
return mbm_local_class;
|
|
}
|
|
|
|
bool resctrl_arch_is_mbm_total_enabled(void)
|
|
{
|
|
return mbm_total_class;
|
|
}
|
|
|
|
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
|
|
{
|
|
switch (rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
return cdp_enabled;
|
|
case RDT_RESOURCE_MBA:
|
|
default:
|
|
/*
|
|
* x86's MBA control doesn't support CDP, so user-space doesn't
|
|
* expect it.
|
|
*/
|
|
return false;
|
|
}
|
|
}
|
|
|
|
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable)
|
|
{
|
|
u64 regval;
|
|
u32 partid, partid_i, partid_d;
|
|
|
|
cdp_enabled = enable;
|
|
|
|
partid = RESCTRL_RESERVED_CLOSID;
|
|
|
|
if (enable) {
|
|
partid_d = resctrl_get_config_index(partid, CDP_CODE);
|
|
partid_i = resctrl_get_config_index(partid, CDP_DATA);
|
|
regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid_d) |
|
|
FIELD_PREP(MPAM_SYSREG_PARTID_I, partid_i);
|
|
|
|
} else {
|
|
regval = FIELD_PREP(MPAM_SYSREG_PARTID_D, partid) |
|
|
FIELD_PREP(MPAM_SYSREG_PARTID_I, partid);
|
|
}
|
|
|
|
WRITE_ONCE(mpam_resctrl_default_group, regval);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
|
|
{
|
|
return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
|
|
}
|
|
|
|
/*
|
|
* MSC may raise an error interrupt if it sees an out or range partid/pmg,
|
|
* and go on to truncate the value. Regardless of what the hardware supports,
|
|
* only the system wide safe value is safe to use.
|
|
*/
|
|
u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
|
|
{
|
|
return mpam_partid_max + 1;
|
|
}
|
|
|
|
u32 resctrl_arch_system_num_rmid_idx(void)
|
|
{
|
|
u8 closid_shift = fls(mpam_pmg_max);
|
|
u32 num_partid = resctrl_arch_get_num_closid(NULL);
|
|
|
|
return num_partid << closid_shift;
|
|
}
|
|
|
|
u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
|
|
{
|
|
u8 closid_shift = fls(mpam_pmg_max);
|
|
|
|
BUG_ON(closid_shift > 8);
|
|
|
|
return (closid << closid_shift) | rmid;
|
|
}
|
|
|
|
void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
|
|
{
|
|
u8 closid_shift = fls(mpam_pmg_max);
|
|
u32 pmg_mask = ~(~0 << closid_shift);
|
|
|
|
BUG_ON(closid_shift > 8);
|
|
|
|
*closid = idx >> closid_shift;
|
|
*rmid = idx & pmg_mask;
|
|
}
|
|
|
|
void resctrl_sched_in(struct task_struct *tsk)
|
|
{
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
mpam_thread_switch(tsk);
|
|
}
|
|
|
|
void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 pmg)
|
|
{
|
|
BUG_ON(closid > U16_MAX);
|
|
BUG_ON(pmg > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_cpu_defaults(cpu, closid, closid, pmg, pmg);
|
|
} else {
|
|
/*
|
|
* When CDP is enabled, resctrl halves the closid range and we
|
|
* use odd/even partid for one closid.
|
|
*/
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_cpu_defaults(cpu, partid_d, partid_i, pmg, pmg);
|
|
}
|
|
}
|
|
|
|
void resctrl_arch_sync_cpu_defaults(void *info)
|
|
{
|
|
struct resctrl_cpu_sync *r = info;
|
|
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
if (r) {
|
|
resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
|
|
r->closid, r->rmid);
|
|
}
|
|
|
|
resctrl_sched_in(current);
|
|
}
|
|
|
|
void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
|
|
|
|
BUG_ON(closid > U16_MAX);
|
|
BUG_ON(rmid > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
|
|
} else {
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
|
|
}
|
|
}
|
|
|
|
bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return tsk_closid == closid;
|
|
}
|
|
|
|
/* The task's pmg is not unique, the partid must be considered too */
|
|
bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM_SYSREG_PARTID_D, regval);
|
|
u32 tsk_rmid = FIELD_GET(MPAM_SYSREG_PMG_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return (tsk_closid == closid) && (tsk_rmid == rmid);
|
|
}
|
|
|
|
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
|
|
{
|
|
if (l >= RDT_NUM_RESOURCES)
|
|
return NULL;
|
|
|
|
return &mpam_resctrl_exports[l].resctrl_res;
|
|
}
|
|
|
|
static void *resctrl_arch_mon_ctx_alloc_no_wait(struct rdt_resource *r,
|
|
int evtid)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
u32 *ret = kmalloc(sizeof(*ret), GFP_KERNEL);
|
|
|
|
if (!ret)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
switch (evtid) {
|
|
case QOS_L3_OCCUP_EVENT_ID:
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
|
|
*ret = mpam_alloc_csu_mon(res->class);
|
|
return ret;
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
case QOS_L3_MBM_TOTAL_EVENT_ID:
|
|
return mon_is_rmid_idx;
|
|
}
|
|
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
}
|
|
|
|
void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
|
|
{
|
|
DEFINE_WAIT(wait);
|
|
void *ret;
|
|
|
|
might_sleep();
|
|
|
|
do {
|
|
prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
|
|
TASK_INTERRUPTIBLE);
|
|
ret = resctrl_arch_mon_ctx_alloc_no_wait(r, evtid);
|
|
if (PTR_ERR(ret) == -ENOSPC)
|
|
schedule();
|
|
} while (PTR_ERR(ret) == -ENOSPC && !signal_pending(current));
|
|
finish_wait(&resctrl_mon_ctx_waiters, &wait);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
|
|
void *arch_mon_ctx)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
u32 mon = *(u32 *)arch_mon_ctx;
|
|
|
|
if (mon == USE_RMID_IDX)
|
|
return;
|
|
kfree(arch_mon_ctx);
|
|
arch_mon_ctx = NULL;
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
|
|
switch (evtid) {
|
|
case QOS_L3_OCCUP_EVENT_ID:
|
|
mpam_free_csu_mon(res->class, mon);
|
|
wake_up(&resctrl_mon_ctx_waiters);
|
|
return;
|
|
case QOS_L3_MBM_TOTAL_EVENT_ID:
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
return;
|
|
}
|
|
}
|
|
|
|
static enum mon_filter_options resctrl_evt_config_to_mpam(u32 local_evt_cfg)
|
|
{
|
|
switch (local_evt_cfg) {
|
|
case READS_TO_LOCAL_MEM:
|
|
return COUNT_READ;
|
|
case NON_TEMP_WRITE_TO_LOCAL_MEM:
|
|
return COUNT_WRITE;
|
|
default:
|
|
return COUNT_BOTH;
|
|
}
|
|
}
|
|
|
|
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid,
|
|
u64 *val, void *arch_mon_ctx)
|
|
{
|
|
int err;
|
|
u64 cdp_val;
|
|
struct mon_cfg cfg;
|
|
struct mpam_resctrl_dom *dom;
|
|
u32 mon = *(u32 *)arch_mon_ctx;
|
|
enum mpam_device_features type;
|
|
|
|
resctrl_arch_rmid_read_context_check();
|
|
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
|
|
switch (eventid) {
|
|
case QOS_L3_OCCUP_EVENT_ID:
|
|
type = mpam_feat_msmon_csu;
|
|
break;
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
case QOS_L3_MBM_TOTAL_EVENT_ID:
|
|
type = mpam_feat_msmon_mbwu;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
cfg.mon = mon;
|
|
if (cfg.mon == USE_RMID_IDX)
|
|
cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
|
|
|
|
cfg.match_pmg = true;
|
|
cfg.pmg = rmid;
|
|
cfg.opts = resctrl_evt_config_to_mpam(dom->mbm_local_evt_cfg);
|
|
|
|
if (cdp_enabled) {
|
|
cfg.partid = closid << 1;
|
|
err = mpam_msmon_read(dom->comp, &cfg, type, val);
|
|
if (err)
|
|
return err;
|
|
|
|
cfg.partid += 1;
|
|
err = mpam_msmon_read(dom->comp, &cfg, type, &cdp_val);
|
|
if (!err)
|
|
*val += cdp_val;
|
|
} else {
|
|
cfg.partid = closid;
|
|
err = mpam_msmon_read(dom->comp, &cfg, type, val);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid)
|
|
{
|
|
struct mon_cfg cfg;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
if (eventid != QOS_L3_MBM_LOCAL_EVENT_ID)
|
|
return;
|
|
|
|
cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
|
|
cfg.match_pmg = true;
|
|
cfg.pmg = rmid;
|
|
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
|
|
if (cdp_enabled) {
|
|
cfg.partid = closid << 1;
|
|
mpam_msmon_reset_mbwu(dom->comp, &cfg);
|
|
|
|
cfg.partid += 1;
|
|
mpam_msmon_reset_mbwu(dom->comp, &cfg);
|
|
} else {
|
|
cfg.partid = closid;
|
|
mpam_msmon_reset_mbwu(dom->comp, &cfg);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The rmid realloc threshold should be for the smallest cache exposed to
|
|
* resctrl.
|
|
*/
|
|
static void update_rmid_limits(unsigned int size)
|
|
{
|
|
u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
|
|
|
|
if (WARN_ON_ONCE(!size))
|
|
return;
|
|
|
|
if (resctrl_rmid_realloc_limit && size > resctrl_rmid_realloc_limit)
|
|
return;
|
|
|
|
resctrl_rmid_realloc_limit = size;
|
|
resctrl_rmid_realloc_threshold = size / num_unique_pmg;
|
|
}
|
|
|
|
static bool cache_has_usable_cpor(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
|
|
return false;
|
|
|
|
/* TODO: Scaling is not yet supported */
|
|
return (class->props.cpbm_wd <= RESCTRL_MAX_CBM);
|
|
}
|
|
|
|
static bool cache_has_usable_csu(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops;
|
|
|
|
if (!class)
|
|
return false;
|
|
|
|
cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
return false;
|
|
|
|
/*
|
|
* CSU counters settle on the value, so we can get away with
|
|
* having only one.
|
|
*/
|
|
if (!cprops->num_csu_mon)
|
|
return false;
|
|
|
|
return (mpam_partid_max > 1) || (mpam_pmg_max != 0);
|
|
}
|
|
|
|
bool resctrl_arch_is_llc_occupancy_enabled(void)
|
|
{
|
|
return cache_has_usable_csu(mpam_resctrl_exports[RDT_RESOURCE_L3].class);
|
|
}
|
|
|
|
static bool class_has_usable_mbwu(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops))
|
|
return false;
|
|
|
|
/*
|
|
* resctrl expects the bandwidth counters to be free running,
|
|
* which means we need as many monitors as resctrl has
|
|
* control/monitor groups.
|
|
*/
|
|
if (cprops->num_mbwu_mon < resctrl_arch_system_num_rmid_idx())
|
|
return false;
|
|
|
|
return (mpam_partid_max > 1) || (mpam_pmg_max != 0);
|
|
}
|
|
|
|
static bool mba_class_use_mbw_part(struct mpam_props *cprops)
|
|
{
|
|
/* TODO: Scaling is not yet supported */
|
|
return (mpam_has_feature(mpam_feat_mbw_part, cprops) &&
|
|
cprops->mbw_pbm_bits < MAX_MBA_BW);
|
|
}
|
|
|
|
static bool class_has_usable_mba(struct mpam_props *cprops)
|
|
{
|
|
if (mba_class_use_mbw_part(cprops) ||
|
|
mpam_has_feature(mpam_feat_mbw_max, cprops))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Calculate the percentage change from each implemented bit in the control
|
|
* This can return 0 when BWA_WD is greater than 6. (100 / (1<<7) == 0)
|
|
*/
|
|
static u32 get_mba_granularity(struct mpam_props *cprops)
|
|
{
|
|
if (mba_class_use_mbw_part(cprops)) {
|
|
return MAX_MBA_BW / cprops->mbw_pbm_bits;
|
|
} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
/*
|
|
* bwa_wd is the number of bits implemented in the 0.xxx
|
|
* fixed point fraction. 1 bit is 50%, 2 is 25% etc.
|
|
*/
|
|
return MAX_MBA_BW / (cprops->bwa_wd + 1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static u32 mbw_pbm_to_percent(unsigned long mbw_pbm, struct mpam_props *cprops)
|
|
{
|
|
u32 bit, result = 0, granularity = get_mba_granularity(cprops);
|
|
|
|
for_each_set_bit(bit, &mbw_pbm, cprops->mbw_pbm_bits % 32) {
|
|
result += granularity;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
|
|
{
|
|
u8 bit;
|
|
u32 divisor = 2, value = 0;
|
|
|
|
for (bit = 15; bit; bit--) {
|
|
if (mbw_max & BIT(bit))
|
|
value += MAX_MBA_BW / divisor;
|
|
divisor <<= 1;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops)
|
|
{
|
|
u32 granularity = get_mba_granularity(cprops);
|
|
u8 num_bits = pc / granularity;
|
|
|
|
if (!num_bits)
|
|
return 0;
|
|
|
|
/* TODO: pick bits at random to avoid contention */
|
|
return (1 << num_bits) - 1;
|
|
}
|
|
|
|
static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
|
|
{
|
|
u8 bit;
|
|
u32 divisor = 2, value = 0;
|
|
|
|
if (WARN_ON_ONCE(cprops->bwa_wd > 15))
|
|
return MAX_MBA_BW;
|
|
|
|
for (bit = 15; bit; bit--) {
|
|
if (pc >= MAX_MBA_BW / divisor) {
|
|
pc -= MAX_MBA_BW / divisor;
|
|
value |= BIT(bit);
|
|
}
|
|
divisor <<= 1;
|
|
|
|
if (!pc || !(MAX_MBA_BW / divisor))
|
|
break;
|
|
}
|
|
|
|
value &= GENMASK(15, 15 - cprops->bwa_wd);
|
|
|
|
return value;
|
|
}
|
|
|
|
/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
|
|
static void mpam_resctrl_pick_caches(void)
|
|
{
|
|
int idx;
|
|
unsigned int cache_size;
|
|
struct mpam_class *class;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
idx = srcu_read_lock(&mpam_srcu);
|
|
list_for_each_entry_rcu(class, &mpam_classes, classes_list) {
|
|
struct mpam_props *cprops = &class->props;
|
|
bool has_cpor = cache_has_usable_cpor(class);
|
|
|
|
if (class->type != MPAM_CLASS_CACHE) {
|
|
pr_debug("pick_caches: Class is not a cache\n");
|
|
continue;
|
|
}
|
|
|
|
if (class->level != 2 && class->level != 3) {
|
|
pr_debug("pick_caches: not L2 or L3\n");
|
|
continue;
|
|
}
|
|
|
|
if (class->level == 2 && !has_cpor) {
|
|
pr_debug("pick_caches: L2 missing CPOR\n");
|
|
continue;
|
|
}
|
|
else if (!has_cpor && !cache_has_usable_csu(class)) {
|
|
pr_debug("pick_caches: Cache misses CPOR and CSU\n");
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("pick_caches: Class has missing CPUs\n");
|
|
continue;
|
|
}
|
|
|
|
/* Assume cache levels are the same size for all CPUs... */
|
|
cache_size = get_cpu_cacheinfo_size(smp_processor_id(), class->level);
|
|
if (!cache_size) {
|
|
pr_debug("pick_caches: Could not read cache size\n");
|
|
continue;
|
|
}
|
|
|
|
if (mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
update_rmid_limits(cache_size);
|
|
|
|
if (class->level == 2) {
|
|
res = &mpam_resctrl_exports[RDT_RESOURCE_L2];
|
|
res->resctrl_res.name = "L2";
|
|
} else {
|
|
res = &mpam_resctrl_exports[RDT_RESOURCE_L3];
|
|
res->resctrl_res.name = "L3";
|
|
}
|
|
res->class = class;
|
|
}
|
|
srcu_read_unlock(&mpam_srcu, idx);
|
|
}
|
|
|
|
static void mpam_resctrl_pick_mba(void)
|
|
{
|
|
struct mpam_class *class, *candidate_class = NULL;
|
|
struct mpam_resctrl_res *res;
|
|
int idx;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
idx = srcu_read_lock(&mpam_srcu);
|
|
list_for_each_entry_rcu(class, &mpam_classes, classes_list) {
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (class->level < 3)
|
|
continue;
|
|
|
|
if (!class_has_usable_mba(cprops))
|
|
continue;
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask))
|
|
continue;
|
|
|
|
/*
|
|
* mba_sc reads the mbm_local counter, and waggles the MBA controls.
|
|
* mbm_local is implicitly part of the L3, pick a resouce to be MBA
|
|
* that as close as possible to the L3.
|
|
*/
|
|
if (!candidate_class || class->level < candidate_class->level)
|
|
candidate_class = class;
|
|
}
|
|
srcu_read_unlock(&mpam_srcu, idx);
|
|
|
|
if (candidate_class) {
|
|
res = &mpam_resctrl_exports[RDT_RESOURCE_MBA];
|
|
res->class = candidate_class;
|
|
res->resctrl_res.name = "MB";
|
|
}
|
|
}
|
|
|
|
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
|
|
{
|
|
struct mpam_props *cprops;
|
|
|
|
switch (evt) {
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
if (!mbm_local_class)
|
|
return false;
|
|
cprops = &mbm_local_class->props;
|
|
|
|
return mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, cprops);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_read(void *info)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
struct resctrl_mon_config_info *mon_info = info;
|
|
|
|
dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom);
|
|
mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS;
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_write(void *info)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
struct resctrl_mon_config_info *mon_info = info;
|
|
|
|
if (mon_info->mon_config & ~MPAM_RESTRL_EVT_CONFIG_VALID) {
|
|
mon_info->err = -EOPNOTSUPP;
|
|
return;
|
|
}
|
|
|
|
dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_dom);
|
|
dom->mbm_local_evt_cfg = mon_info->mon_config & MPAM_RESTRL_EVT_CONFIG_VALID;
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
|
|
mpam_msmon_reset_all_mbwu(dom->comp);
|
|
}
|
|
|
|
static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_class *class = res->class;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
bool has_mbwu = class_has_usable_mbwu(class);
|
|
|
|
/* Is this one of the two well-known caches? */
|
|
if (res->resctrl_res.rid == RDT_RESOURCE_L2 ||
|
|
res->resctrl_res.rid == RDT_RESOURCE_L3) {
|
|
bool has_csu = cache_has_usable_csu(class);
|
|
|
|
/* TODO: Scaling is not yet supported */
|
|
r->cache.cbm_len = class->props.cpbm_wd;
|
|
r->cache.arch_has_sparse_bitmasks = true;
|
|
|
|
/* mpam_devices will reject empty bitmaps */
|
|
r->cache.min_cbm_bits = 1;
|
|
|
|
/* TODO: kill these properties off as they are derivatives */
|
|
r->format_str = "%d=%0*x";
|
|
r->fflags = RFTYPE_RES_CACHE;
|
|
r->default_ctrl = BIT_MASK(class->props.cpbm_wd) - 1;
|
|
r->data_width = (class->props.cpbm_wd + 3) / 4;
|
|
|
|
/*
|
|
* Which bits are shared with other ...things...
|
|
* Unknown devices use partid-0 which uses all the bitmap
|
|
* fields. Until we configured the SMMU and GIC not to do this
|
|
* 'all the bits' is the correct answer here.
|
|
*/
|
|
r->cache.shareable_bits = r->default_ctrl;
|
|
|
|
if (mpam_has_feature(mpam_feat_cpor_part, &class->props)) {
|
|
r->alloc_capable = true;
|
|
exposed_alloc_capable = true;
|
|
}
|
|
|
|
/*
|
|
* MBWU counters may be 'local' or 'total' depending on where
|
|
* they are in the topology. Counters on caches are assumed to
|
|
* be local. If it's on the memory controller, its assumed to
|
|
* be global.
|
|
*/
|
|
if (has_mbwu && class->level >= 3) {
|
|
mbm_local_class = class;
|
|
r->mon_capable = true;
|
|
}
|
|
|
|
/*
|
|
* CSU counters only make sense on a cache. The file is called
|
|
* llc_occupancy, but its expected to the on the L3.
|
|
*/
|
|
if (has_csu && class->type == MPAM_CLASS_CACHE &&
|
|
class->level == 3) {
|
|
r->mon_capable = true;
|
|
}
|
|
} else if (res->resctrl_res.rid == RDT_RESOURCE_MBA) {
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
/* TODO: kill these properties off as they are derivatives */
|
|
r->format_str = "%d=%0*u";
|
|
r->fflags = RFTYPE_RES_MB;
|
|
r->default_ctrl = MAX_MBA_BW;
|
|
r->data_width = 3;
|
|
|
|
r->membw.delay_linear = true;
|
|
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
|
|
r->membw.bw_gran = get_mba_granularity(cprops);
|
|
|
|
/* Round up to at least 1% */
|
|
if (!r->membw.bw_gran)
|
|
r->membw.bw_gran = 1;
|
|
|
|
if (class_has_usable_mba(cprops)) {
|
|
r->alloc_capable = true;
|
|
exposed_alloc_capable = true;
|
|
}
|
|
|
|
if (has_mbwu && class->type == MPAM_CLASS_MEMORY) {
|
|
mbm_total_class = class;
|
|
r->mon_capable = true;
|
|
}
|
|
}
|
|
|
|
if (r->mon_capable) {
|
|
exposed_mon_capable = true;
|
|
|
|
/*
|
|
* Unfortunately, num_rmid doesn't mean anything for
|
|
* mpam, and its exposed to user-space!
|
|
* num-rmid is supposed to mean the number of groups
|
|
* that can be created, both control or monitor groups.
|
|
* For mpam, each control group has its own pmg/rmid
|
|
* space.
|
|
*/
|
|
r->num_rmid = 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mpam_resctrl_setup(void)
|
|
{
|
|
int err = 0;
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level i;
|
|
|
|
wait_event(wait_cacheinfo_ready, cacheinfo_ready);
|
|
|
|
cpus_read_lock();
|
|
for (i = 0; i < RDT_NUM_RESOURCES; i++) {
|
|
res = &mpam_resctrl_exports[i];
|
|
INIT_LIST_HEAD(&res->resctrl_res.domains);
|
|
INIT_LIST_HEAD(&res->resctrl_res.evt_list);
|
|
res->resctrl_res.rid = i;
|
|
}
|
|
|
|
mpam_resctrl_pick_caches();
|
|
mpam_resctrl_pick_mba();
|
|
/* TODO: mpam_resctrl_pick_counters(); */
|
|
|
|
for (i = 0; i < RDT_NUM_RESOURCES; i++) {
|
|
res = &mpam_resctrl_exports[i];
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
err = mpam_resctrl_resource_init(res);
|
|
if (err)
|
|
break;
|
|
}
|
|
cpus_read_unlock();
|
|
|
|
if (!err && !exposed_alloc_capable && !exposed_mon_capable)
|
|
err = -EOPNOTSUPP;
|
|
|
|
if (!err) {
|
|
if (!is_power_of_2(mpam_pmg_max + 1)) {
|
|
/*
|
|
* If not all the partid*pmg values are valid indexes,
|
|
* resctrl may allocate pmg that don't exist. This
|
|
* should cause an error interrupt.
|
|
*/
|
|
pr_warn("Number of PMG is not a power of 2! resctrl may misbehave");
|
|
}
|
|
|
|
err = resctrl_init();
|
|
if (!err)
|
|
WRITE_ONCE(resctrl_enabled, true);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void mpam_resctrl_exit(void)
|
|
{
|
|
if (!READ_ONCE(resctrl_enabled))
|
|
return;
|
|
|
|
WRITE_ONCE(resctrl_enabled, false);
|
|
resctrl_exit();
|
|
}
|
|
|
|
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
|
|
u32 closid, enum resctrl_conf_type type)
|
|
{
|
|
u32 partid;
|
|
struct mpam_config *cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
enum mpam_device_features configured_by;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return r->default_ctrl;
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
partid = resctrl_get_config_index(closid, type);
|
|
cfg = &dom->comp->cfg[partid];
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
configured_by = mpam_feat_cpor_part;
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mba_class_use_mbw_part(cprops)) {
|
|
configured_by = mpam_feat_mbw_part;
|
|
break;
|
|
} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
configured_by = mpam_feat_mbw_max;
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
|
|
!mpam_has_feature(configured_by, cfg))
|
|
return r->default_ctrl;
|
|
|
|
switch (configured_by) {
|
|
case mpam_feat_cpor_part:
|
|
/* TODO: Scaling is not yet supported */
|
|
return cfg->cpbm;
|
|
case mpam_feat_mbw_part:
|
|
/* TODO: Scaling is not yet supported */
|
|
return mbw_pbm_to_percent(cfg->mbw_pbm, cprops);
|
|
case mpam_feat_mbw_max:
|
|
return mbw_max_to_percent(cfg->mbw_max, cprops);
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
|
|
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
|
|
{
|
|
int err;
|
|
u32 partid;
|
|
struct mpam_config cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
/* NOTE: don't check the CPU as mpam_apply_config() doesn't care,
|
|
* and resctrl_arch_update_domains() depends on this. */
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
partid = resctrl_get_config_index(closid, t);
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r))
|
|
return -EINVAL;
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
/* TODO: Scaling is not yet supported */
|
|
cfg.cpbm = cfg_val;
|
|
mpam_set_feature(mpam_feat_cpor_part, &cfg);
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mba_class_use_mbw_part(cprops)) {
|
|
cfg.mbw_pbm = percent_to_mbw_pbm(cfg_val, cprops);
|
|
mpam_set_feature(mpam_feat_mbw_part, &cfg);
|
|
break;
|
|
} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
|
|
mpam_set_feature(mpam_feat_mbw_max, &cfg);
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* When CDP is enabled, but the resource doesn't support it, we need to
|
|
* apply the same configuration to the other partid.
|
|
*/
|
|
if (mpam_resctrl_hide_cdp(r->rid)) {
|
|
partid = resctrl_get_config_index(closid, CDP_CODE);
|
|
err = mpam_apply_config(dom->comp, partid, &cfg);
|
|
if (err)
|
|
return err;
|
|
|
|
partid = resctrl_get_config_index(closid, CDP_DATA);
|
|
return mpam_apply_config(dom->comp, partid, &cfg);
|
|
|
|
} else {
|
|
return mpam_apply_config(dom->comp, partid, &cfg);
|
|
}
|
|
}
|
|
|
|
/* TODO: this is IPI heavy */
|
|
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
|
|
{
|
|
int err = 0;
|
|
struct rdt_domain *d;
|
|
enum resctrl_conf_type t;
|
|
struct resctrl_staged_config *cfg;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
list_for_each_entry(d, &r->domains, list) {
|
|
for (t = 0; t < CDP_NUM_TYPES; t++) {
|
|
cfg = &d->staged_config[t];
|
|
if (!cfg->have_new_ctrl)
|
|
continue;
|
|
|
|
err = resctrl_arch_update_one(r, d, closid, t,
|
|
cfg->new_ctrl);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void resctrl_arch_reset_resources(void)
|
|
{
|
|
int i, idx;
|
|
struct mpam_class *class;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return;
|
|
|
|
for (i = 0; i < RDT_NUM_RESOURCES; i++) {
|
|
res = &mpam_resctrl_exports[i];
|
|
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
if (!res->resctrl_res.alloc_capable)
|
|
continue;
|
|
|
|
idx = srcu_read_lock(&mpam_srcu);
|
|
list_for_each_entry_rcu(class, &mpam_classes, classes_list)
|
|
mpam_reset_class(class);
|
|
srcu_read_unlock(&mpam_srcu, idx);
|
|
}
|
|
}
|
|
|
|
static struct mpam_resctrl_dom *
|
|
mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
struct mpam_class *class = res->class;
|
|
struct mpam_component *comp_iter, *comp;
|
|
|
|
comp = NULL;
|
|
list_for_each_entry(comp_iter, &class->components, class_list) {
|
|
if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
|
|
comp = comp_iter;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* cpu with unknown exported component? */
|
|
if (WARN_ON_ONCE(!comp))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
|
|
if (!dom)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
dom->comp = comp;
|
|
INIT_LIST_HEAD(&dom->resctrl_dom.list);
|
|
dom->resctrl_dom.id = comp->comp_id;
|
|
dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
|
|
cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask);
|
|
|
|
/* TODO: this list should be sorted */
|
|
list_add_tail(&dom->resctrl_dom.list, &res->resctrl_res.domains);
|
|
|
|
return dom;
|
|
}
|
|
|
|
/* Like resctrl_get_domain_from_cpu(), but for offline CPUs */
|
|
static struct mpam_resctrl_dom *
|
|
mpam_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
struct rdt_domain *d;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
list_for_each_entry(d, &res->resctrl_res.domains, list) {
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
|
|
if (cpumask_test_cpu(cpu, &dom->comp->affinity))
|
|
return dom;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id)
|
|
{
|
|
struct rdt_domain *d;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
list_for_each_entry(d, &r->domains, list) {
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
if (dom->comp->comp_id == id)
|
|
return &dom->resctrl_dom;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int mpam_resctrl_online_cpu(unsigned int cpu)
|
|
{
|
|
int i, err;
|
|
struct mpam_resctrl_dom *dom;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
for (i = 0; i < RDT_NUM_RESOURCES; i++) {
|
|
res = &mpam_resctrl_exports[i];
|
|
|
|
if (!res->class)
|
|
continue; // dummy_resource;
|
|
|
|
dom = mpam_get_domain_from_cpu(cpu, res);
|
|
if (dom) {
|
|
cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask);
|
|
continue;
|
|
}
|
|
|
|
dom = mpam_resctrl_alloc_domain(cpu, res);
|
|
if (IS_ERR(dom))
|
|
return PTR_ERR(dom);
|
|
err = resctrl_online_domain(&res->resctrl_res, &dom->resctrl_dom);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
resctrl_online_cpu(cpu);
|
|
return 0;
|
|
}
|
|
|
|
int mpam_resctrl_offline_cpu(unsigned int cpu)
|
|
{
|
|
int i;
|
|
struct rdt_domain *d;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
resctrl_offline_cpu(cpu);
|
|
|
|
for (i = 0; i < RDT_NUM_RESOURCES; i++) {
|
|
res = &mpam_resctrl_exports[i];
|
|
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
d = resctrl_get_domain_from_cpu(cpu, &res->resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
|
|
|
|
/* The last one standing was ahead of us... */
|
|
if (WARN_ON_ONCE(!d))
|
|
continue;
|
|
|
|
cpumask_clear_cpu(cpu, &d->cpu_mask);
|
|
|
|
if (!cpumask_empty(&d->cpu_mask))
|
|
continue;
|
|
|
|
resctrl_offline_domain(&res->resctrl_res, &dom->resctrl_dom);
|
|
list_del(&d->list);
|
|
kfree(dom);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __init __cacheinfo_ready(void)
|
|
{
|
|
cacheinfo_ready = true;
|
|
wake_up(&wait_cacheinfo_ready);
|
|
|
|
return 0;
|
|
}
|
|
device_initcall_sync(__cacheinfo_ready);
|