2026-01-21 18:59:54 +08:00

2412 lines
58 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* Common code for ARM v8 MPAM
*
* Copyright (C) 2018-2019 Huawei Technologies Co., Ltd
*
* Author: Xie XiuQi <xiexiuqi@huawei.com>
*
* Code was partially borrowed from arch/x86/kernel/cpu/intel_rdt*.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* More information about MPAM be found in the Arm Architecture Reference
* Manual.
*
* https://static.docs.arm.com/ddi0598/a/DDI0598_MPAM_supp_armv8a.pdf
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/cacheinfo.h>
#include <linux/cpuhotplug.h>
#include <linux/task_work.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/arm_mpam.h>
#include <asm/mpam_sched.h>
#include <asm/mpam.h>
#include <asm/io.h>
#include "mpam_device.h"
#include "mpam_resource.h"
#include "mpam_internal.h"
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(resctrl_group_mutex);
/*
* The cached intel_pqr_state is strictly per CPU and can never be
* updated from a remote CPU. Functions which modify the state
* are called with interrupts disabled and no preemption, which
* is sufficient for the protection.
*/
DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
/*
* Used to store the max resource name width and max resource data width
* to display the schemata in a tabular format
*/
int max_name_width, max_data_width;
/*
* Global boolean for rdt_alloc which is true if any
* resource allocation is enabled.
*/
bool rdt_alloc_capable;
/*
* Indicate if had mount cdpl2/cdpl3 option.
*/
static bool resctrl_cdp_enabled;
/*
* Hi1620 2P Base Address Map
*
* AFF2 | NODE | DIE | Base Address
* ------------------------------------
* 01 | 0 | P0 TB | 0x000098xxxxxx
* 03 | 1 | P0 TA | 0x000090xxxxxx
* 05 | 2 | P1 TB | 0x200098xxxxxx
* 07 | 3 | P2 TA | 0x200090xxxxxx
*
* AFF2: MPIDR.AFF2
*/
int mpam_resctrl_set_default_cpu(unsigned int cpu)
{
/* The cpu is set in default rdtgroup after online. */
cpumask_set_cpu(cpu, &resctrl_group_default.cpu_mask);
/* Update CPU mpam sysregs' default setting when cdp enabled */
if (resctrl_cdp_enabled)
resctrl_cdp_update_cpus_state(&resctrl_group_default);
return 0;
}
void mpam_resctrl_clear_default_cpu(unsigned int cpu)
{
struct resctrl_group *rdtgrp;
list_for_each_entry(rdtgrp, &resctrl_all_groups, resctrl_group_list) {
/* The cpu is clear in associated rdtgroup after offline. */
cpumask_clear_cpu(cpu, &rdtgrp->cpu_mask);
}
}
bool is_resctrl_cdp_enabled(void)
{
return !!resctrl_cdp_enabled;
}
static void
mpam_resctrl_update_component_cfg(struct resctrl_resource *r,
struct rdt_domain *d, struct sd_closid *closid);
static void
common_wrmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para);
static u64 cache_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para);
static u64 mbw_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para);
static u64 cache_rdmon(struct rdt_domain *d, void *md_priv);
static u64 mbw_rdmon(struct rdt_domain *d, void *md_priv);
static int common_wrmon(struct rdt_domain *d, void *md_priv);
static int parse_cache(char *buf, struct resctrl_resource *r,
struct resctrl_staged_config *cfg, enum resctrl_ctrl_type ctrl_type);
static int parse_bw(char *buf, struct resctrl_resource *r,
struct resctrl_staged_config *cfg, enum resctrl_ctrl_type ctrl_type);
struct raw_resctrl_resource raw_resctrl_resources_all[] = {
[RDT_RESOURCE_L3] = {
.msr_update = common_wrmsr,
.msr_read = cache_rdmsr,
.parse_ctrlval = parse_cache,
.format_str = "%d=%0*x",
.mon_read = cache_rdmon,
.mon_write = common_wrmon,
.fflags = RFTYPE_RES_CACHE,
.ctrl_features = {
[SCHEMA_COMM] = {
.type = SCHEMA_COMM,
.flags = SCHEMA_COMM,
.name = "comm",
.base = 16,
.evt = QOS_CAT_CPBM_EVENT_ID,
.capable = 1,
.ctrl_suffix = "",
},
[SCHEMA_PRI] = {
.type = SCHEMA_PRI,
.flags = SCHEMA_PRI,
.name = "caPrio",
.base = 10,
.evt = QOS_CAT_INTPRI_EVENT_ID,
.ctrl_suffix = "PRI",
},
[SCHEMA_PBM] = {
.type = SCHEMA_PBM,
.flags = SCHEMA_COMM,
.name = "caPbm",
.base = 16,
.evt = QOS_CAT_CPBM_EVENT_ID,
.ctrl_suffix = "PBM",
},
[SCHEMA_MAX] = {
.type = SCHEMA_MAX,
.flags = SCHEMA_COMM,
.name = "caMax",
.base = 10,
.evt = QOS_CAT_CMAX_EVENT_ID,
.ctrl_suffix = "MAX",
},
},
},
[RDT_RESOURCE_L2] = {
.msr_update = common_wrmsr,
.msr_read = cache_rdmsr,
.parse_ctrlval = parse_cache,
.format_str = "%d=%0*x",
.mon_read = cache_rdmon,
.mon_write = common_wrmon,
.fflags = RFTYPE_RES_CACHE,
.ctrl_features = {
[SCHEMA_COMM] = {
.type = SCHEMA_COMM,
.flags = SCHEMA_COMM,
.name = "comm",
.base = 16,
.evt = QOS_CAT_CPBM_EVENT_ID,
.capable = 1,
.ctrl_suffix = "",
},
[SCHEMA_PRI] = {
.type = SCHEMA_PRI,
.flags = SCHEMA_PRI,
.name = "caPrio",
.base = 10,
.evt = QOS_CAT_INTPRI_EVENT_ID,
.ctrl_suffix = "PRI",
},
[SCHEMA_PBM] = {
.type = SCHEMA_PBM,
.flags = SCHEMA_COMM,
.name = "caPbm",
.base = 16,
.evt = QOS_CAT_CPBM_EVENT_ID,
.ctrl_suffix = "PBM",
},
[SCHEMA_MAX] = {
.type = SCHEMA_MAX,
.flags = SCHEMA_COMM,
.name = "caMax",
.base = 10,
.evt = QOS_CAT_CMAX_EVENT_ID,
.ctrl_suffix = "MAX",
},
},
},
[RDT_RESOURCE_MC] = {
.msr_update = common_wrmsr,
.msr_read = mbw_rdmsr,
.parse_ctrlval = parse_bw,
.format_str = "%d=%0*d",
.mon_read = mbw_rdmon,
.mon_write = common_wrmon,
.fflags = RFTYPE_RES_MB,
.ctrl_features = {
[SCHEMA_COMM] = {
.type = SCHEMA_COMM,
.flags = SCHEMA_COMM,
.name = "comm",
.base = 10,
.evt = QOS_MBA_MAX_EVENT_ID,
.capable = 1,
.ctrl_suffix = "",
},
[SCHEMA_PRI] = {
.type = SCHEMA_PRI,
.flags = SCHEMA_PRI,
.name = "mbPrio",
.base = 10,
.evt = QOS_MBA_INTPRI_EVENT_ID,
.ctrl_suffix = "PRI",
},
[SCHEMA_HDL] = {
.type = SCHEMA_HDL,
.flags = SCHEMA_HDL,
.name = "mbHdl",
.base = 10,
.evt = QOS_MBA_HDL_EVENT_ID,
.ctrl_suffix = "HDL",
},
[SCHEMA_PBM] = {
.type = SCHEMA_PBM,
.flags = SCHEMA_COMM,
.name = "mbPbm",
.base = 16,
.evt = QOS_MBA_PBM_EVENT_ID,
.ctrl_suffix = "PBM",
},
[SCHEMA_MAX] = {
.type = SCHEMA_MAX,
.flags = SCHEMA_COMM,
.name = "mbMax",
.base = 10,
.evt = QOS_MBA_MAX_EVENT_ID,
.ctrl_suffix = "MAX",
},
[SCHEMA_MIN] = {
.type = SCHEMA_MIN,
.flags = SCHEMA_COMM,
.name = "mbMin",
.base = 10,
.evt = QOS_MBA_MIN_EVENT_ID,
.ctrl_suffix = "MIN",
},
},
},
};
struct raw_resctrl_resource *
mpam_get_raw_resctrl_resource(enum resctrl_resource_level level)
{
if (level >= RDT_NUM_RESOURCES)
return NULL;
return &raw_resctrl_resources_all[level];
}
/*
* Read one cache schema row. Check that it is valid for the current
* resource type.
*/
static int
parse_cache(char *buf, struct resctrl_resource *r,
struct resctrl_staged_config *cfg,
enum resctrl_ctrl_type type)
{
unsigned long data;
struct raw_resctrl_resource *rr = r->res;
if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain\n");
return -EINVAL;
}
if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
return -EINVAL;
if (data >= rr->ctrl_features[type].max_wd)
return -EINVAL;
cfg->new_ctrl[type] = data;
cfg->have_new_ctrl = true;
return 0;
}
static int
parse_bw(char *buf, struct resctrl_resource *r,
struct resctrl_staged_config *cfg,
enum resctrl_ctrl_type type)
{
unsigned long data;
struct raw_resctrl_resource *rr = r->res;
if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain\n");
return -EINVAL;
}
switch (rr->ctrl_features[type].evt) {
case QOS_MBA_MAX_EVENT_ID:
case QOS_MBA_PBM_EVENT_ID:
if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
return -EINVAL;
data = (data < r->mbw.min_bw) ? r->mbw.min_bw : data;
data = roundup(data, r->mbw.bw_gran);
break;
case QOS_MBA_MIN_EVENT_ID:
if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
return -EINVAL;
/* for mbw min feature, 0 of setting is allowed */
data = roundup(data, r->mbw.bw_gran);
break;
default:
if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
return -EINVAL;
break;
}
if (data >= rr->ctrl_features[type].max_wd)
return -EINVAL;
cfg->new_ctrl[type] = data;
cfg->have_new_ctrl = true;
return 0;
}
static void
common_wrmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para)
{
struct sync_args args;
struct mpam_resctrl_dom *dom;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
mpam_resctrl_update_component_cfg(r, d, para->closid);
/*
* so far we have accomplished configuration replication,
* it is ready to apply this configuration.
*/
args.closid = *para->closid;
mpam_component_config(dom->comp, &args);
}
static u64 cache_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para)
{
u32 result;
struct sync_args args;
struct mpam_resctrl_dom *dom;
struct raw_resctrl_resource *rr = r->res;
args.closid = *para->closid;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
args.eventid = rr->ctrl_features[para->type].evt;
mpam_component_get_config(dom->comp, &args, &result);
return result;
}
static u64 mbw_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
struct msr_param *para)
{
u32 result;
struct sync_args args;
struct mpam_resctrl_dom *dom;
struct raw_resctrl_resource *rr = r->res;
args.closid = *para->closid;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
args.eventid = rr->ctrl_features[para->type].evt;
mpam_component_get_config(dom->comp, &args, &result);
switch (rr->ctrl_features[para->type].evt) {
case QOS_MBA_MAX_EVENT_ID:
case QOS_MBA_MIN_EVENT_ID:
case QOS_MBA_PBM_EVENT_ID:
result = roundup(result, r->mbw.bw_gran);
break;
default:
break;
}
return result;
}
/*
* use pmg as monitor id
* just use match_pardid only.
*/
static u64 cache_rdmon(struct rdt_domain *d, void *md_priv)
{
int err;
u64 result;
union mon_data_bits md;
struct sync_args args;
struct mpam_resctrl_dom *dom;
unsigned long timeout;
md.priv = md_priv;
/* monitoring only need reqpartid */
args.closid.reqpartid = md.u.partid;
args.mon = md.u.mon;
args.pmg = md.u.pmg;
args.match_pmg = true;
args.eventid = QOS_L3_OCCUP_EVENT_ID;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
/**
* We should judge if return is OK, it is possible affected
* by NRDY bit.
*/
timeout = READ_ONCE(jiffies) + (1*SEC_CONVERSION);
do {
if (time_after(READ_ONCE(jiffies), timeout)) {
err = -ETIMEDOUT;
break;
}
err = mpam_component_mon(dom->comp, &args, &result);
/* Currently just report it */
WARN_ON(err && (err != -EBUSY));
} while (err == -EBUSY);
return result;
}
/*
* use pmg as monitor id
* just use match_pardid only.
*/
static u64 mbw_rdmon(struct rdt_domain *d, void *md_priv)
{
int err;
u64 result;
union mon_data_bits md;
struct sync_args args;
struct mpam_resctrl_dom *dom;
unsigned long timeout;
md.priv = md_priv;
/* monitoring only need reqpartid */
args.closid.reqpartid = md.u.partid;
args.mon = md.u.mon;
args.pmg = md.u.pmg;
args.match_pmg = true;
args.eventid = QOS_L3_MBM_LOCAL_EVENT_ID;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
/**
* We should judge if return is OK, it is possible affected
* by NRDY bit.
*/
timeout = READ_ONCE(jiffies) + (1*SEC_CONVERSION);
do {
if (time_after(READ_ONCE(jiffies), timeout)) {
err = -ETIMEDOUT;
break;
}
err = mpam_component_mon(dom->comp, &args, &result);
/* Currently just report it */
WARN_ON(err && (err != -EBUSY));
} while (err == -EBUSY);
return result;
}
static int
common_wrmon(struct rdt_domain *d, void *md_priv)
{
u64 result;
union mon_data_bits md;
struct sync_args args;
struct mpam_resctrl_dom *dom;
md.priv = md_priv;
/* monitoring only need reqpartid */
args.closid.reqpartid = md.u.partid;
args.mon = md.u.mon;
args.pmg = md.u.pmg;
args.match_pmg = true;
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
/**
* We needn't judge if return is OK, we just want to configure
* monitor info.
*/
mpam_component_mon(dom->comp, &args, &result);
return 0;
}
/*
* Notifing resctrl_id_init() should be called after calling parse_
* resctrl_group_fs_options() to guarantee resctrl_cdp_enabled() active.
*
* Using a global CLOSID across all resources has some advantages and
* some drawbacks:
* + We can simply set "current->closid" to assign a task to a resource
* group.
* + Context switch code can avoid extra memory references deciding which
* CLOSID to load into the PQR_ASSOC MSR
* - We give up some options in configuring resource groups across multi-socket
* systems.
* - Our choices on how to configure each resource become progressively more
* limited as the number of resources grows.
*/
static int num_intpartid, num_reqpartid;
static unsigned long *intpartid_free_map;
static void mpam_resctrl_closid_collect(void)
{
struct mpam_resctrl_res *res;
struct raw_resctrl_resource *rr;
/*
* num_reqpartid refers to the maximum partid number
* that system width provides.
*/
num_reqpartid = mpam_sysprops_num_partid();
/*
* we make intpartid the closid, this is because when
* system platform supports intpartid narrowing, this
* intpartid concept represents the resctrl maximum
* group we can create, so it should be less than
* maximum reqpartid number and maximum closid number
* allowed by resctrl sysfs provided by @Intel-RDT.
*/
num_intpartid = mpam_sysprops_num_partid();
num_intpartid = min(num_reqpartid, RESCTRL_MAX_CLOSID);
/*
* as we know we make intpartid the closid given to
* resctrl, we should know if any resource supports
* intpartid narrowing.
*/
for_each_supported_resctrl_exports(res) {
rr = res->resctrl_res.res;
if (!rr->num_intpartid)
continue;
num_intpartid = min(num_intpartid, (int)rr->num_intpartid);
}
}
static u32 get_nr_closid(void)
{
if (!intpartid_free_map)
return 0;
return num_intpartid;
}
int closid_bitmap_init(void)
{
int pos;
u32 times, flag;
u32 bits_num;
mpam_resctrl_closid_collect();
bits_num = num_intpartid;
hw_alloc_times_validate(times, flag);
bits_num = rounddown(bits_num, times);
if (!bits_num)
return -EINVAL;
if (intpartid_free_map)
kfree(intpartid_free_map);
intpartid_free_map = bitmap_zalloc(bits_num, GFP_KERNEL);
if (!intpartid_free_map)
return -ENOMEM;
bitmap_set(intpartid_free_map, 0, bits_num);
/* CLOSID 0 is always reserved for the default group */
pos = find_first_bit(intpartid_free_map, bits_num);
bitmap_clear(intpartid_free_map, pos, times);
return 0;
}
/**
* struct rmid_transform - Matrix for transforming rmid to partid and pmg
* @rows: Number of bits for remap_body[:] bitmap
* @clos: Number of bitmaps
* @nr_usage: Number rmid we have
* @step_size: Step size from traversing the point of matrix once
* @step_cnt: Indicates how many times to traverse(.e.g if cdp;step_cnt=2)
* @remap_body: Storing bitmaps' entry and itself
*/
struct rmid_transform {
u32 rows;
u32 cols;
u32 nr_usage;
int step_size;
int step_cnt;
unsigned long **remap_body;
};
static struct rmid_transform rmid_remap_matrix;
DEFINE_STATIC_KEY_FALSE(rmid_remap_enable_key);
static u32 get_nr_rmids(void)
{
if (!static_branch_likely(&rmid_remap_enable_key))
return 0;
return rmid_remap_matrix.nr_usage;
}
/*
* a rmid remap matrix is delivered for transforming partid pmg to rmid,
* this matrix is organized like this:
*
* [bitmap entry indexed by partid]
*
* [0] [1] [2] [3] [4] [5]
* occ 1 0 0 1 1 1
* bitmap[:0] 1 0 0 1 1 1
* bitmap[:1] 1 1 1 1 1 1
* bitmap[:2] 1 1 1 1 1 1
* [pos is pmg]
*
* Calculate rmid = partid + NR_partid * pmg
*
* occ represents if this bitmap has been used by a partid, it is because
* a certain partid should not be accompany with a duplicated pmg for
* monitoring, this design easily saves a lot of space, and can also decrease
* time complexity of allocating and free rmid process from O(NR_partid)*
* O(NR_pmg) to O(NR_partid) + O(log(NR_pmg)) compared with using list.
*/
static int set_rmid_remap_matrix(u32 rows, u32 cols)
{
u32 times, flag;
int ret, col;
/*
* cols stands for partid, so if cdp enabled we must
* keep at least two partid for LxCODE and LxDATA
* respectively once time.
*/
hw_alloc_times_validate(times, flag);
rmid_remap_matrix.cols = rounddown(cols, times);
rmid_remap_matrix.step_cnt = times;
if (times > rmid_remap_matrix.cols)
return -EINVAL;
/*
* if only pmg(Performance Monitor Group)
* work on the monitor, step_size must be
* set to maximum number of columns,
* otherwise set it to 1, such as kunpeng
* 920 does.
*/
rmid_remap_matrix.step_size = 1;
/*
* first row of rmid remap matrix is used for indicating
* if remap bitmap is occupied by a col index.
*/
rmid_remap_matrix.rows = rows + 1;
if (rows == 0 || cols == 0)
return -EINVAL;
rmid_remap_matrix.nr_usage = rows * cols;
/* free history pointer for matrix recreation */
if (rmid_remap_matrix.remap_body) {
for (col = 0; col < cols; col++) {
if (!rmid_remap_matrix.remap_body[col])
continue;
kfree(rmid_remap_matrix.remap_body[col]);
}
kfree(rmid_remap_matrix.remap_body);
}
rmid_remap_matrix.remap_body = kcalloc(rmid_remap_matrix.cols,
sizeof(*rmid_remap_matrix.remap_body), GFP_KERNEL);
if (!rmid_remap_matrix.remap_body)
return -ENOMEM;
for (col = 0; col < cols; col++) {
if (rmid_remap_matrix.remap_body[col])
kfree(rmid_remap_matrix.remap_body[col]);
rmid_remap_matrix.remap_body[col] =
bitmap_zalloc(rmid_remap_matrix.rows,
GFP_KERNEL);
if (!rmid_remap_matrix.remap_body[col]) {
ret = -ENOMEM;
goto clean;
}
bitmap_set(rmid_remap_matrix.remap_body[col],
0, rmid_remap_matrix.rows);
}
/* make column entry of rmid matrix visible */
static_branch_enable_cpuslocked(&rmid_remap_enable_key);
return 0;
clean:
for (col = 0; col < cols; col++) {
if (!rmid_remap_matrix.remap_body[col])
continue;
kfree(rmid_remap_matrix.remap_body[col]);
rmid_remap_matrix.remap_body[col] = NULL;
}
if (rmid_remap_matrix.remap_body) {
kfree(rmid_remap_matrix.remap_body);
rmid_remap_matrix.remap_body = NULL;
}
/* if recreation failed, cannot use rmid remap matrix */
static_branch_disable_cpuslocked(&rmid_remap_enable_key);
return ret;
}
static u32 probe_rmid_remap_matrix_cols(void)
{
return (u32)num_reqpartid;
}
static u32 probe_rmid_remap_matrix_rows(void)
{
return (u32)mpam_sysprops_num_pmg();
}
static inline unsigned long **__rmid_remap_bmp(u32 col)
{
if (!static_branch_likely(&rmid_remap_enable_key))
return NULL;
if (col >= rmid_remap_matrix.cols)
return NULL;
return rmid_remap_matrix.remap_body + col;
}
/*
* these macros defines how can we traverse rmid remap matrix, there are
* three scenarios:
*
* (1) step_size is default set to 1, if only PMG(NR_PMG=4) works, makes
* it equals to number of columns, step_cnt means how many times are
* allocated and released each time, at this time rmid remap matrix
* looks like:
*
* ^
* |
* ------column------>
*
* RMID 0 1 2 3 (step_size=1)
* `---'
* `--> (step_cnt=2 if cdp enabled)
*
* RMID 0 1 2 3 (step_size=1)
* `--
* `--> (step_cnt=1 if cdp disabled)
*
* (2) if PARTID(NR_PARTID=4) and PMG(NR_PMG=4) works together, at this
* time rmid remap matrix looks like:
*
* ------------row------------>
* |
* | RMID 0 1 2 3 (step_size=1)
* | `---'
* | `--> (step_cnt=2 if cdp enabled)
* | 4 5 6 7
* | 8 9 10 11
* v 12 13 14 15
*
* (3) step_size not equal to 1, cross-line traversal, but this scenario
* did not happen yet.
*/
#define __xy_initialize(x, y, from) \
(x = from, y = 0)
#define __xy_overflow(x, y) \
(y >= rmid_remap_matrix.cols)
#define __x_forward(x) \
(x = (x + 1) % rmid_remap_matrix.cols)
#define __y_forward(x, y) \
(y += ((x) ? 0 : 1))
#define __step_xy_initialize(step, x, y, from) \
(x = from, step = 1, y = 0)
#define __step_align(from) \
(!(from % (rmid_remap_matrix.step_size * \
rmid_remap_matrix.step_cnt)))
#define __step_overflow(step) \
(__xy_overflow(x, y) || \
(step > rmid_remap_matrix.step_cnt))
#define __step_x_forward(x) \
__x_forward(x)
#define __step_forward(step, x) \
(step += ((x % rmid_remap_matrix.step_size) ? 0 : 1))
#define __step_y_forward(x, y) \
__y_forward(x, y)
#define for_each_rmid_transform_point_step_from(p_entry, step, x, y, from) \
for (__step_xy_initialize(step, x, y, from), \
(p_entry) = __rmid_remap_bmp((from)); \
__step_align(from) && !__step_overflow(step); \
__step_x_forward(x), \
__step_forward(step, x), \
__step_y_forward(x, y), \
(p_entry) = __rmid_remap_bmp(x)) \
if (unlikely(((p_entry) == NULL) || \
(*p_entry) == NULL)) \
WARN_ON_ONCE(1); \
else
#define for_each_rmid_transform_point_from(p_entry, x, y, from) \
for (__xy_initialize(x, y, from), \
(p_entry) = __rmid_remap_bmp((from)); \
!__xy_overflow(x, y); \
__x_forward(x), \
__y_forward(x, y), \
(p_entry) = __rmid_remap_bmp(x)) \
if (unlikely(((p_entry) == NULL) || \
(*p_entry) == NULL)) \
WARN_ON_ONCE(1); \
else
static void set_rmid_remap_bmp_occ(unsigned long *bmp)
{
clear_bit(0, bmp);
}
static void unset_rmid_remap_bmp_occ(unsigned long *bmp)
{
set_bit(0, bmp);
}
static int is_rmid_remap_bmp_bdr_set(unsigned long *bmp, int b)
{
return (test_bit(b + 1, bmp) == 0) ? 1 : 0;
}
static void rmid_remap_bmp_bdr_set(unsigned long *bmp, int b)
{
set_bit(b + 1, bmp);
}
static void rmid_remap_bmp_bdr_clear(unsigned long *bmp, int b)
{
clear_bit(b + 1, bmp);
}
static int is_rmid_remap_bmp_occ(unsigned long *bmp)
{
return (find_first_bit(bmp, rmid_remap_matrix.rows) == 0) ? 0 : 1;
}
static int is_rmid_remap_bmp_full(unsigned long *bmp)
{
return ((is_rmid_remap_bmp_occ(bmp) &&
bitmap_weight(bmp, rmid_remap_matrix.rows) ==
(rmid_remap_matrix.rows-1)) ||
bitmap_full(bmp, rmid_remap_matrix.rows));
}
static int rmid_remap_bmp_find_step_entry(int partid)
{
int x, y;
unsigned long **bmp;
if (rmid_remap_matrix.step_size ==
rmid_remap_matrix.cols)
return 0;
/* step entry should be non-occupied and aligned */
bmp = __rmid_remap_bmp(partid);
if (bmp)
return (is_rmid_remap_bmp_occ(*bmp) ||
!__step_align(partid)) ? -ENOSPC : partid;
for_each_rmid_transform_point_from(bmp, x, y, 0) {
/*
* do not waste partid resource, start
* from step aligned position.
*/
if (__step_align(x) && !is_rmid_remap_bmp_occ(*bmp))
return x;
}
return -ENOSPC;
}
static int rmid_remap_bmp_alloc_pmg(unsigned long *bmp)
{
int pos;
pos = find_first_bit(bmp, rmid_remap_matrix.rows);
if (pos == rmid_remap_matrix.rows)
return -ENOSPC;
clear_bit(pos, bmp);
return pos - 1;
}
static int rmid_remap_matrix_init(void)
{
int x, y, step, ret;
u32 cols, rows;
unsigned long **bmp;
cols = probe_rmid_remap_matrix_cols();
rows = probe_rmid_remap_matrix_rows();
ret = set_rmid_remap_matrix(rows, cols);
if (ret)
goto out;
/*
* if CDP disabled, drop partid = 0, pmg = 0
* from bitmap for root resctrl group reserving
* default rmid, otherwise drop partid = 0 and
* partid = 1 for LxCACHE, LxDATA reservation.
*/
for_each_rmid_transform_point_step_from(bmp, step, x, y, 0) {
set_rmid_remap_bmp_occ(*bmp);
rmid_remap_bmp_alloc_pmg(*bmp);
}
ret = rmid_mon_ptrs_init(rmid_remap_matrix.nr_usage);
if (ret)
goto out;
return 0;
out:
return ret;
}
int resctrl_id_init(void)
{
int ret;
ret = closid_bitmap_init();
if (ret)
return ret;
return rmid_remap_matrix_init();
}
static int is_rmid_valid(int rmid)
{
return ((u32)rmid >= rmid_remap_matrix.nr_usage) ? 0 : 1;
}
static int to_rmid(int partid, int pmg)
{
return (partid + (rmid_remap_matrix.cols * pmg));
}
static int rmid_to_partid_pmg(int rmid, int *partid, int *pmg)
{
if (!is_rmid_valid(rmid))
return -EINVAL;
if (pmg)
*pmg = rmid / rmid_remap_matrix.cols;
if (partid)
*partid = rmid % rmid_remap_matrix.cols;
return 0;
}
static int __rmid_alloc(int partid, int pmg)
{
int x, y, step, ret, rmid;
bool checkpmg = false;
unsigned long **bmp;
if (pmg >= 0)
checkpmg = true;
/* traverse from first non-occupied and step-aligned entry */
ret = rmid_remap_bmp_find_step_entry(partid);
if (ret < 0)
goto out;
partid = ret;
for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) {
set_rmid_remap_bmp_occ(*bmp);
/* checking if the given pmg is available */
if (checkpmg) {
/*
* it can only happened in step_size aligned
* position, so it does not exist pmgs cleared
* before.
*/
if (is_rmid_remap_bmp_bdr_set(*bmp, pmg + y)) {
ret = -EEXIST;
goto out;
}
rmid_remap_bmp_bdr_clear(*bmp, pmg + y);
continue;
}
/* alloc available pmg */
ret = rmid_remap_bmp_alloc_pmg(*bmp);
if (ret < 0)
goto out;
/* always return first pmg */
if (pmg < 0)
pmg = ret;
}
rmid = to_rmid(partid, pmg);
if (!is_rmid_valid(rmid)) {
ret = -ENOSPC;
goto out;
}
ret = assoc_rmid_with_mon(rmid);
if (ret) {
rmid_free(rmid);
goto out;
}
return rmid;
out:
return ret;
}
int rmid_alloc(int partid)
{
return __rmid_alloc(partid, -1);
}
void rmid_free(int rmid)
{
int x, y, step, partid, pmg;
unsigned long **bmp;
if (rmid_to_partid_pmg(rmid, &partid, &pmg))
return;
for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) {
rmid_remap_bmp_bdr_set(*bmp, pmg + y);
if (is_rmid_remap_bmp_full(*bmp))
unset_rmid_remap_bmp_occ(*bmp);
}
deassoc_rmid_with_mon(rmid);
}
int mpam_rmid_to_partid_pmg(int rmid, int *partid, int *pmg)
{
return rmid_to_partid_pmg(rmid, partid, pmg);
}
EXPORT_SYMBOL(mpam_rmid_to_partid_pmg);
/*
* If cdp enabled, allocate two closid once time, then return first
* allocated id.
*/
int closid_alloc(void)
{
int pos;
u32 times, flag;
hw_alloc_times_validate(times, flag);
pos = find_first_bit(intpartid_free_map, num_intpartid);
if (pos == num_intpartid)
return -ENOSPC;
bitmap_clear(intpartid_free_map, pos, times);
return pos;
}
void closid_free(int closid)
{
u32 times, flag;
hw_alloc_times_validate(times, flag);
bitmap_set(intpartid_free_map, closid, times);
}
/*
* Choose a width for the resource name and resource data based on the
* resource that has widest name and cbm.
*/
static void mpam_init_padding(void)
{
int cl;
struct mpam_resctrl_res *res;
struct resctrl_resource *r;
struct raw_resctrl_resource *rr;
for_each_supported_resctrl_exports(res) {
r = &res->resctrl_res;
cl = strlen(r->name);
if (cl > max_name_width)
max_name_width = cl;
rr = r->res;
if (!rr)
continue;
cl = rr->data_width;
if (cl > max_data_width)
max_data_width = cl;
}
}
void post_resctrl_mount(void)
{
if (rdt_alloc_capable)
static_branch_enable_cpuslocked(&resctrl_alloc_enable_key);
if (rdt_mon_capable)
static_branch_enable_cpuslocked(&resctrl_mon_enable_key);
if (rdt_alloc_capable || rdt_mon_capable)
static_branch_enable_cpuslocked(&resctrl_enable_key);
}
void release_rdtgroupfs_options(void)
{
}
void disable_cdp(void)
{
struct mpam_resctrl_res *res;
struct resctrl_resource *r;
for_each_supported_resctrl_exports(res) {
r = &res->resctrl_res;
r->cdp_enable = false;
}
resctrl_cdp_enabled = false;
}
static int try_to_enable_cdp(enum resctrl_resource_level level)
{
struct resctrl_resource *r = mpam_resctrl_get_resource(level);
if (!r || !r->cdp_capable)
return -EINVAL;
r->cdp_enable = true;
resctrl_cdp_enabled = true;
return 0;
}
int cdpl3_enable(void)
{
return try_to_enable_cdp(RDT_RESOURCE_L3);
}
int cdpl2_enable(void)
{
return try_to_enable_cdp(RDT_RESOURCE_L2);
}
void basic_ctrl_enable(void)
{
struct mpam_resctrl_res *res;
struct raw_resctrl_resource *rr;
for_each_supported_resctrl_exports(res) {
rr = res->resctrl_res.res;
/* At least SCHEMA_COMM is supported */
rr->ctrl_features[SCHEMA_COMM].enabled = true;
}
}
int extend_ctrl_enable(char *tok)
{
bool match = false;
struct resctrl_resource *r;
struct raw_resctrl_resource *rr;
struct mpam_resctrl_res *res;
struct resctrl_ctrl_feature *feature;
enum resctrl_ctrl_type type;
for_each_supported_resctrl_exports(res) {
r = &res->resctrl_res;
if (!r->alloc_capable)
continue;
rr = r->res;
for_each_extend_ctrl_type(type) {
feature = &rr->ctrl_features[type];
if (!feature->capable || !feature->name)
continue;
if (strcmp(feature->name, tok))
continue;
rr->ctrl_features[type].enabled = true;
/*
* If we chose to enable a feature also embraces
* SCHEMA_COMM, SCHEMA_COMM will not be selected.
*/
if (feature->flags == SCHEMA_COMM)
rr->ctrl_features[SCHEMA_COMM].enabled = false;;
match = true;
}
}
if (!match)
return -EINVAL;
return 0;
}
void extend_ctrl_disable(void)
{
struct raw_resctrl_resource *rr;
struct mpam_resctrl_res *res;
struct resctrl_ctrl_feature *feature;
enum resctrl_ctrl_type type;
for_each_supported_resctrl_exports(res) {
rr = res->resctrl_res.res;
for_each_extend_ctrl_type(type) {
feature = &rr->ctrl_features[type];
feature->enabled = false;
}
}
}
/*
* This is safe against intel_resctrl_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
* from update_closid_rmid() is proteced against __switch_to() because
* preemption is disabled.
*/
void update_cpu_closid_rmid(void *info)
{
struct rdtgroup *r = info;
if (r) {
this_cpu_write(pqr_state.default_closid, resctrl_navie_closid(r->closid));
this_cpu_write(pqr_state.default_rmid, resctrl_navie_rmid(r->mon.rmid));
}
/*
* We cannot unconditionally write the MSR because the current
* executing task might have its own closid selected. Just reuse
* the context switch code.
*/
mpam_sched_in();
}
/*
* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
*
* Per task closids/rmids must have been set up before calling this function.
*/
void
update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, cpu_mask))
update_cpu_closid_rmid(r);
smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
put_cpu();
}
struct task_move_callback {
struct callback_head work;
struct rdtgroup *rdtgrp;
};
static void move_myself(struct callback_head *head)
{
struct task_move_callback *callback;
struct rdtgroup *rdtgrp;
callback = container_of(head, struct task_move_callback, work);
rdtgrp = callback->rdtgrp;
/*
* If resource group was deleted before this task work callback
* was invoked, then assign the task to root group and free the
* resource group.
*/
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
(rdtgrp->flags & RDT_DELETED)) {
current->closid = 0;
current->rmid = 0;
kfree(rdtgrp);
}
preempt_disable();
/* update PQR_ASSOC MSR to make resource group go into effect */
mpam_sched_in();
preempt_enable();
kfree(callback);
}
int __resctrl_group_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp)
{
struct task_move_callback *callback;
int ret;
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
if (!callback)
return -ENOMEM;
callback->work.func = move_myself;
callback->rdtgrp = rdtgrp;
/*
* Take a refcount, so rdtgrp cannot be freed before the
* callback has been invoked.
*/
atomic_inc(&rdtgrp->waitcount);
ret = task_work_add(tsk, &callback->work, true);
if (ret) {
/*
* Task is exiting. Drop the refcount and free the callback.
* No need to check the refcount as the group cannot be
* deleted before the write function unlocks resctrl_group_mutex.
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
rdt_last_cmd_puts("task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
if (rdtgrp->type == RDTCTRL_GROUP) {
tsk->closid = resctrl_navie_closid(rdtgrp->closid);
tsk->rmid = resctrl_navie_rmid(rdtgrp->mon.rmid);
} else if (rdtgrp->type == RDTMON_GROUP) {
if (rdtgrp->mon.parent->closid.intpartid == tsk->closid) {
tsk->closid = resctrl_navie_closid(rdtgrp->closid);
tsk->rmid = resctrl_navie_rmid(rdtgrp->mon.rmid);
} else {
rdt_last_cmd_puts("Can't move task to different control group\n");
ret = -EINVAL;
}
}
}
return ret;
}
static int resctrl_group_seqfile_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
struct rftype *rft = of->kn->priv;
if (rft->seq_show)
return rft->seq_show(of, m, arg);
return 0;
}
static ssize_t resctrl_group_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct rftype *rft = of->kn->priv;
if (rft->write)
return rft->write(of, buf, nbytes, off);
return -EINVAL;
}
struct kernfs_ops resctrl_group_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = resctrl_group_file_write,
.seq_show = resctrl_group_seqfile_show,
};
static bool is_cpu_list(struct kernfs_open_file *of)
{
struct rftype *rft = of->kn->priv;
return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
}
static int resctrl_group_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
int ret = 0;
rdtgrp = resctrl_group_kn_lock_live(of->kn);
if (rdtgrp) {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
} else {
ret = -ENOENT;
}
resctrl_group_kn_unlock(of->kn);
return ret;
}
static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
{
struct rdtgroup *crgrp;
cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
/* update the child mon group masks as well*/
list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
}
int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
{
struct rdtgroup *r, *crgrp;
struct list_head *head;
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
if (cpumask_weight(tmpmask)) {
/* Can't drop from default group */
if (rdtgrp == &resctrl_group_default) {
rdt_last_cmd_puts("Can't drop CPUs from default group\n");
return -EINVAL;
}
/* Give any dropped cpus to rdtgroup_default */
cpumask_or(&resctrl_group_default.cpu_mask,
&resctrl_group_default.cpu_mask, tmpmask);
update_closid_rmid(tmpmask, &resctrl_group_default);
}
/*
* If we added cpus, remove them from previous group and
* the prev group's child groups that owned them
* and update per-cpu closid/rmid.
*/
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
if (cpumask_weight(tmpmask)) {
list_for_each_entry(r, &resctrl_all_groups, resctrl_group_list) {
if (r == rdtgrp)
continue;
cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
if (cpumask_weight(tmpmask1))
cpumask_rdtgrp_clear(r, tmpmask1);
}
update_closid_rmid(tmpmask, rdtgrp);
}
/* Done pushing/pulling - update this group with new mask */
cpumask_copy(&rdtgrp->cpu_mask, newmask);
/*
* Clear child mon group masks since there is a new parent mask
* now and update the rmid for the cpus the child lost.
*/
head = &rdtgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
update_closid_rmid(tmpmask, rdtgrp);
cpumask_clear(&crgrp->cpu_mask);
}
return 0;
}
int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
cpumask_var_t tmpmask)
{
struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
struct list_head *head;
/* Check whether cpus belong to parent ctrl group */
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
if (cpumask_weight(tmpmask)) {
rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
return -EINVAL;
}
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
if (cpumask_weight(tmpmask)) {
/* Give any dropped cpus to parent rdtgroup */
cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
update_closid_rmid(tmpmask, prgrp);
}
/*
* If we added cpus, remove them from previous group that owned them
* and update per-cpu rmid
*/
cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
if (cpumask_weight(tmpmask)) {
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
if (crgrp == rdtgrp)
continue;
cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
tmpmask);
}
update_closid_rmid(tmpmask, rdtgrp);
}
/* Done pushing/pulling - update this group with new mask */
cpumask_copy(&rdtgrp->cpu_mask, newmask);
return 0;
}
static ssize_t resctrl_group_cpus_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
cpumask_var_t tmpmask, newmask, tmpmask1;
struct rdtgroup *rdtgrp;
int ret;
if (!buf)
return -EINVAL;
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
free_cpumask_var(tmpmask);
return -ENOMEM;
}
if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
free_cpumask_var(tmpmask);
free_cpumask_var(newmask);
return -ENOMEM;
}
rdtgrp = resctrl_group_kn_lock_live(of->kn);
rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
rdt_last_cmd_puts("directory was removed\n");
goto unlock;
}
if (is_cpu_list(of))
ret = cpulist_parse(buf, newmask);
else
ret = cpumask_parse(buf, newmask);
if (ret) {
rdt_last_cmd_puts("bad cpu list/mask\n");
goto unlock;
}
/* check that user didn't specify any offline cpus */
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
if (cpumask_weight(tmpmask)) {
ret = -EINVAL;
rdt_last_cmd_puts("can only assign online cpus\n");
goto unlock;
}
if (rdtgrp->type == RDTCTRL_GROUP)
ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
else if (rdtgrp->type == RDTMON_GROUP)
ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
else
ret = -EINVAL;
unlock:
resctrl_group_kn_unlock(of->kn);
free_cpumask_var(tmpmask);
free_cpumask_var(newmask);
free_cpumask_var(tmpmask1);
return ret ?: nbytes;
}
static int resctrl_group_task_write_permission(struct task_struct *task,
struct kernfs_open_file *of)
{
const struct cred *tcred = get_task_cred(task);
const struct cred *cred = current_cred();
int ret = 0;
/*
* Even if we're attaching all tasks in the thread group, we only
* need to check permissions on one of them.
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
ret = -EPERM;
}
put_cred(tcred);
return ret;
}
static int resctrl_group_move_task(pid_t pid, struct rdtgroup *rdtgrp,
struct kernfs_open_file *of)
{
struct task_struct *tsk;
int ret;
rcu_read_lock();
if (pid) {
tsk = find_task_by_vpid(pid);
if (!tsk) {
rcu_read_unlock();
rdt_last_cmd_printf("No task %d\n", pid);
return -ESRCH;
}
} else {
tsk = current;
}
get_task_struct(tsk);
rcu_read_unlock();
ret = resctrl_group_task_write_permission(tsk, of);
if (!ret)
ret = __resctrl_group_move_task(tsk, rdtgrp);
put_task_struct(tsk);
return ret;
}
static struct seq_buf last_cmd_status;
static char last_cmd_status_buf[512];
void rdt_last_cmd_clear(void)
{
lockdep_assert_held(&resctrl_group_mutex);
seq_buf_clear(&last_cmd_status);
}
void rdt_last_cmd_puts(const char *s)
{
lockdep_assert_held(&resctrl_group_mutex);
seq_buf_puts(&last_cmd_status, s);
}
void rdt_last_cmd_printf(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
lockdep_assert_held(&resctrl_group_mutex);
seq_buf_vprintf(&last_cmd_status, fmt, ap);
va_end(ap);
}
static int resctrl_last_cmd_status_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
int len;
mutex_lock(&resctrl_group_mutex);
len = seq_buf_used(&last_cmd_status);
if (len)
seq_printf(seq, "%.*s", len, last_cmd_status_buf);
else
seq_puts(seq, "ok\n");
mutex_unlock(&resctrl_group_mutex);
return 0;
}
static int resctrl_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
u32 flag, times;
hw_alloc_times_validate(times, flag);
seq_printf(seq, "%u\n", get_nr_closid() / times);
return 0;
}
static int resctrl_cbm_mask_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
struct raw_resctrl_resource *rr = r->res;
seq_printf(seq, "%x\n", rr->ctrl_features[SCHEMA_COMM].default_ctrl);
return 0;
}
static int resctrl_min_cbm_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
return 0;
}
static int resctrl_shareable_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
seq_printf(seq, "%x\n", r->cache.shareable_bits);
return 0;
}
static int resctrl_features_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
enum resctrl_ctrl_type type;
struct resctrl_resource *r = of->kn->parent->priv;
struct raw_resctrl_resource *rr = r->res;
for_each_extend_ctrl_type(type) {
if (!rr->ctrl_features[type].enabled)
continue;
/*
* we define the range of ctrl features with integer,
* here give maximum upper bound to user space.
*/
switch (rr->ctrl_features[type].base) {
case 10:
seq_printf(seq, "%s@%u\n", rr->ctrl_features[type].name,
rr->ctrl_features[type].max_wd - 1);
break;
case 16:
seq_printf(seq, "%s@%x\n", rr->ctrl_features[type].name,
rr->ctrl_features[type].max_wd - 1);
break;
default:
break;
}
}
return 0;
}
static int resctrl_min_bandwidth_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
seq_printf(seq, "%u\n", r->mbw.min_bw);
return 0;
}
static int resctrl_bandwidth_gran_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
seq_printf(seq, "%u\n", r->mbw.bw_gran);
return 0;
}
static int resctrl_num_rmids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
u32 flag, times;
hw_alloc_times_validate(times, flag);
seq_printf(seq, "%u\n", get_nr_rmids() / times);
return 0;
}
static int resctrl_num_monitors_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct resctrl_resource *r = of->kn->parent->priv;
struct raw_resctrl_resource *rr = r->res;
u32 flag, times;
hw_alloc_times_validate(times, flag);
seq_printf(seq, "%u\n", rr->num_mon / times);
return 0;
}
static ssize_t resctrl_group_tasks_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct rdtgroup *rdtgrp;
int ret = 0;
pid_t pid;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
rdtgrp = resctrl_group_kn_lock_live(of->kn);
rdt_last_cmd_clear();
if (rdtgrp)
ret = resctrl_group_move_task(pid, rdtgrp, of);
else
ret = -ENOENT;
resctrl_group_kn_unlock(of->kn);
return ret ?: nbytes;
}
static void show_resctrl_tasks(struct rdtgroup *r, struct seq_file *s)
{
struct task_struct *p, *t;
rcu_read_lock();
for_each_process_thread(p, t) {
if ((r->type == RDTMON_GROUP &&
t->rmid == resctrl_navie_rmid(r->mon.rmid)) ||
(r->type == RDTCTRL_GROUP &&
t->closid == resctrl_navie_closid(r->closid)))
seq_printf(s, "%d\n", t->pid);
}
rcu_read_unlock();
}
static int resctrl_group_tasks_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
int ret = 0;
rdtgrp = resctrl_group_kn_lock_live(of->kn);
if (rdtgrp)
show_resctrl_tasks(rdtgrp, s);
else
ret = -ENOENT;
resctrl_group_kn_unlock(of->kn);
return ret;
}
static int resctrl_group_rmid_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
int ret = 0;
struct rdtgroup *rdtgrp;
u32 flag, times;
hw_alloc_times_validate(times, flag);
rdtgrp = resctrl_group_kn_lock_live(of->kn);
if (rdtgrp) {
if (flag)
seq_printf(s, "%u-%u\n", rdtgrp->mon.rmid,
rdtgrp->mon.rmid + 1);
else
seq_printf(s, "%u\n", rdtgrp->mon.rmid);
} else
ret = -ENOENT;
resctrl_group_kn_unlock(of->kn);
return ret;
}
static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct rdtgroup *rdtgrp;
int ret = 0;
int partid;
int pmg;
int rmid;
int old_rmid;
int old_reqpartid;
struct task_struct *p, *t;
if (kstrtoint(strstrip(buf), 0, &rmid) || rmid < 0)
return -EINVAL;
rdtgrp = resctrl_group_kn_lock_live(of->kn);
if (!rdtgrp) {
ret = -ENOENT;
goto unlock;
}
rdt_last_cmd_clear();
if (rmid == 0 || rdtgrp->mon.rmid == 0) {
ret = -EINVAL;
rdt_last_cmd_puts("default rmid 0 is always kept\n");
goto unlock;
}
ret = rmid_to_partid_pmg(rmid, &partid, &pmg);
if (ret < 0) {
ret = -EINVAL;
rdt_last_cmd_puts("invalid rmid\n");
goto unlock;
}
if (rmid == rdtgrp->mon.rmid)
goto unlock;
if (rdtgrp->type != RDTCTRL_GROUP ||
!list_empty(&rdtgrp->mon.crdtgrp_list)) {
ret = -EINVAL;
rdt_last_cmd_puts("unsupported operation\n");
goto unlock;
}
ret = __rmid_alloc(partid, pmg);
if (ret < 0) {
rdt_last_cmd_puts("set rmid failed\n");
goto unlock;
}
old_rmid = rdtgrp->mon.rmid;
old_reqpartid = rdtgrp->closid.reqpartid;
/*
* we use intpartid as group control, use reqpartid for config
* synchronization and monitor, only update the reqpartid
*/
rdtgrp->closid.reqpartid = partid;
rdtgrp->mon.rmid = rmid;
/* update rmid for mondata */
ret = resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp);
if (ret) {
rdt_last_cmd_puts("update rmid for mondata failed\n");
goto rollback;
}
/* resync groups configuration */
rdtgrp->resync = 1;
ret = resctrl_update_groups_config(rdtgrp);
if (ret) {
rdt_last_cmd_puts("update groups config failed\n");
goto rollback;
}
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
if (t->closid == rdtgrp->closid.intpartid) {
ret = __resctrl_group_move_task(t, rdtgrp);
if (ret) {
read_unlock(&tasklist_lock);
goto rollback;
}
}
}
read_unlock(&tasklist_lock);
update_closid_rmid(&rdtgrp->cpu_mask, rdtgrp);
rmid_free(old_rmid);
unlock:
resctrl_group_kn_unlock(of->kn);
if (ret)
return ret;
return nbytes;
rollback:
rdtgrp->mon.rmid = old_rmid;
rdtgrp->closid.reqpartid = old_reqpartid;
/* the old rmid is valid, so mkdir mondata here won't fail */
resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp);
rdtgrp->resync = 1;
WARN_ON_ONCE(resctrl_update_groups_config(rdtgrp));
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
if (t->closid == rdtgrp->closid.intpartid)
WARN_ON_ONCE(__resctrl_group_move_task(t, rdtgrp));
}
read_unlock(&tasklist_lock);
rmid_free(rmid);
resctrl_group_kn_unlock(of->kn);
return ret;
}
/* rdtgroup information files for one cache resource. */
static struct rftype res_specific_files[] = {
{
.name = "last_cmd_status",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_last_cmd_status_show,
.fflags = RF_TOP_INFO,
},
{
.name = "num_closids",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_num_closids_show,
.fflags = RF_CTRL_INFO,
},
{
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_cbm_mask_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "min_cbm_bits",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_min_cbm_bits_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "shareable_bits",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_shareable_bits_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "features",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_features_show,
.fflags = RF_CTRL_INFO,
},
{
.name = "min_bandwidth",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_min_bandwidth_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "bandwidth_gran",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_bandwidth_gran_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "num_rmids",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_num_rmids_show,
.fflags = RF_MON_INFO,
},
{
.name = "num_monitors",
.mode = 0444,
.kf_ops = &resctrl_group_kf_single_ops,
.seq_show = resctrl_num_monitors_show,
.fflags = RF_MON_INFO,
},
{
.name = "cpus",
.mode = 0644,
.kf_ops = &resctrl_group_kf_single_ops,
.write = resctrl_group_cpus_write,
.seq_show = resctrl_group_cpus_show,
.fflags = RFTYPE_BASE,
},
{
.name = "cpus_list",
.mode = 0644,
.kf_ops = &resctrl_group_kf_single_ops,
.write = resctrl_group_cpus_write,
.seq_show = resctrl_group_cpus_show,
.flags = RFTYPE_FLAGS_CPUS_LIST,
.fflags = RFTYPE_BASE,
},
{
.name = "tasks",
.mode = 0644,
.kf_ops = &resctrl_group_kf_single_ops,
.write = resctrl_group_tasks_write,
.seq_show = resctrl_group_tasks_show,
.fflags = RFTYPE_BASE,
},
{
.name = "rmid",
.mode = 0644,
.kf_ops = &resctrl_group_kf_single_ops,
.write = resctrl_group_rmid_write,
.seq_show = resctrl_group_rmid_show,
.fflags = RFTYPE_BASE,
},
{
.name = "schemata",
.mode = 0644,
.kf_ops = &resctrl_group_kf_single_ops,
.write = resctrl_group_schemata_write,
.seq_show = resctrl_group_schemata_show,
.fflags = RF_CTRL_BASE,
}
};
struct rdt_domain *mpam_find_domain(struct resctrl_resource *r, int id,
struct list_head **pos)
{
struct rdt_domain *d;
struct list_head *l;
if (id < 0)
return ERR_PTR(id);
list_for_each(l, &r->domains) {
d = list_entry(l, struct rdt_domain, list);
/* When id is found, return its domain. */
if (id == d->id)
return d;
/* Stop searching when finding id's position in sorted list. */
if (id < d->id)
break;
}
if (pos)
*pos = l;
return NULL;
}
enum mpam_enable_type __read_mostly mpam_enabled;
static int __init mpam_setup(char *str)
{
if (!strcmp(str, "=acpi"))
mpam_enabled = MPAM_ENABLE_ACPI;
else if (!strcmp(str, "=of"))
mpam_enabled = MPAM_ENABLE_OF;
return 1;
}
__setup("mpam", mpam_setup);
int mpam_resctrl_init(void)
{
mpam_init_padding();
register_resctrl_specific_files(res_specific_files,
ARRAY_SIZE(res_specific_files));
seq_buf_init(&last_cmd_status, last_cmd_status_buf,
sizeof(last_cmd_status_buf));
return resctrl_group_init();
}
/*
* __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
* Following considerations are made so that this has minimal impact
* on scheduler hot path:
* - This will stay as no-op unless we are running on an Intel SKU
* which supports resource control or monitoring and we enable by
* mounting the resctrl file system.
* - Caches the per cpu CLOSid/RMID values and does the MSR write only
* when a task with a different CLOSid/RMID is scheduled in.
* - We allocate RMIDs/CLOSids globally in order to keep this as
* simple as possible.
* Must be called with preemption disabled.
*/
void __mpam_sched_in(void)
{
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
u64 partid_d, partid_i;
u64 rmid = state->default_rmid;
u64 closid = state->default_closid;
u64 reqpartid = 0;
u64 pmg = 0;
/*
* If this task has a closid/rmid assigned, use it.
* Else use the closid/rmid assigned to this cpu.
*/
if (static_branch_likely(&resctrl_alloc_enable_key)) {
if (current->closid)
closid = current->closid;
}
if (static_branch_likely(&resctrl_mon_enable_key)) {
if (current->rmid)
rmid = current->rmid;
}
if (closid != state->cur_closid || rmid != state->cur_rmid) {
u64 reg;
resctrl_navie_rmid_partid_pmg(rmid, (int *)&reqpartid, (int *)&pmg);
if (resctrl_cdp_enabled) {
resctrl_cdp_mpamid_map_val(reqpartid, CDP_DATA, partid_d);
resctrl_cdp_mpamid_map_val(reqpartid, CDP_CODE, partid_i);
/* set in EL0 */
reg = mpam_read_sysreg_s(SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
reg = PARTID_D_SET(reg, partid_d);
reg = PARTID_I_SET(reg, partid_i);
reg = PMG_SET(reg, pmg);
mpam_write_sysreg_s(reg, SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
/* set in EL1 */
reg = mpam_read_sysreg_s(SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
reg = PARTID_D_SET(reg, partid_d);
reg = PARTID_I_SET(reg, partid_i);
reg = PMG_SET(reg, pmg);
mpam_write_sysreg_s(reg, SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
} else {
/* set in EL0 */
reg = mpam_read_sysreg_s(SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
reg = PARTID_SET(reg, reqpartid);
reg = PMG_SET(reg, pmg);
mpam_write_sysreg_s(reg, SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
/* set in EL1 */
reg = mpam_read_sysreg_s(SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
reg = PARTID_SET(reg, reqpartid);
reg = PMG_SET(reg, pmg);
mpam_write_sysreg_s(reg, SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
}
state->cur_rmid = rmid;
state->cur_closid = closid;
}
}
static void
mpam_update_from_resctrl_cfg(struct mpam_resctrl_res *res,
u32 resctrl_cfg, enum rdt_event_id evt,
struct mpam_config *mpam_cfg)
{
u64 range;
switch (evt) {
case QOS_MBA_PBM_EVENT_ID:
/* .. the number of bits we can set */
range = res->class->mbw_pbm_bits;
mpam_cfg->mbw_pbm =
(resctrl_cfg * range) / MAX_MBA_BW;
mpam_set_feature(mpam_feat_mbw_part, &mpam_cfg->valid);
break;
case QOS_MBA_MAX_EVENT_ID:
range = MBW_MAX_BWA_FRACT(res->class->bwa_wd);
mpam_cfg->mbw_max = (resctrl_cfg * range) / (MAX_MBA_BW - 1);
mpam_cfg->mbw_max =
(mpam_cfg->mbw_max > range) ? range : mpam_cfg->mbw_max;
mpam_set_feature(mpam_feat_mbw_max, &mpam_cfg->valid);
break;
case QOS_MBA_MIN_EVENT_ID:
range = MBW_MAX_BWA_FRACT(res->class->bwa_wd);
mpam_cfg->mbw_min = (resctrl_cfg * range) / (MAX_MBA_BW - 1);
mpam_cfg->mbw_min =
(mpam_cfg->mbw_min > range) ? range : mpam_cfg->mbw_min;
mpam_set_feature(mpam_feat_mbw_min, &mpam_cfg->valid);
break;
case QOS_MBA_HDL_EVENT_ID:
mpam_cfg->hdl = resctrl_cfg;
mpam_set_feature(mpam_feat_part_hdl, &mpam_cfg->valid);
break;
case QOS_MBA_INTPRI_EVENT_ID:
mpam_cfg->intpri = resctrl_cfg;
mpam_set_feature(mpam_feat_intpri_part, &mpam_cfg->valid);
break;
case QOS_CAT_CPBM_EVENT_ID:
mpam_cfg->cpbm = resctrl_cfg;
mpam_set_feature(mpam_feat_cpor_part, &mpam_cfg->valid);
break;
case QOS_CAT_CMAX_EVENT_ID:
mpam_cfg->cmax = resctrl_cfg;
mpam_set_feature(mpam_feat_ccap_part, &mpam_cfg->valid);
break;
case QOS_CAT_INTPRI_EVENT_ID:
mpam_cfg->intpri = resctrl_cfg;
mpam_set_feature(mpam_feat_intpri_part, &mpam_cfg->valid);
break;
default:
break;
}
}
/*
* copy all ctrl type at once looks more efficient, as it
* only needs refresh devices' state once time through
* mpam_component_config, this feature will be checked
* again when appling configuration.
*/
static void
mpam_resctrl_update_component_cfg(struct resctrl_resource *r,
struct rdt_domain *d, struct sd_closid *closid)
{
struct mpam_resctrl_dom *dom;
struct mpam_resctrl_res *res;
struct mpam_config *slave_mpam_cfg;
struct raw_resctrl_resource *rr = r->res;
enum resctrl_ctrl_type type;
u32 intpartid = closid->intpartid;
u32 reqpartid = closid->reqpartid;
u32 resctrl_cfg;
lockdep_assert_held(&resctrl_group_mutex);
/* Out of range */
if (intpartid >= mpam_sysprops_num_partid() ||
reqpartid >= mpam_sysprops_num_partid())
return;
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);
/*
* now reqpartid is used for duplicating master's configuration,
* mpam_cfg[intpartid] needn't duplicate this setting,
* it is because only reqpartid stands for each rdtgroup's
* mpam_cfg index id.
*/
slave_mpam_cfg = &dom->comp->cfg[reqpartid];
if (WARN_ON_ONCE(!slave_mpam_cfg))
return;
slave_mpam_cfg->valid = 0;
for_each_ctrl_type(type) {
if (!rr->ctrl_features[type].enabled)
continue;
resctrl_cfg = d->ctrl_val[type][intpartid];
mpam_update_from_resctrl_cfg(res, resctrl_cfg,
rr->ctrl_features[type].evt, slave_mpam_cfg);
}
}
static void mpam_reset_cfg(struct mpam_resctrl_res *res,
struct mpam_resctrl_dom *dom, struct rdt_domain *d)
{
int i;
struct resctrl_resource *r = &res->resctrl_res;
struct raw_resctrl_resource *rr = r->res;
enum resctrl_ctrl_type type;
for (i = 0; i != mpam_sysprops_num_partid(); i++) {
for_each_ctrl_type(type) {
mpam_update_from_resctrl_cfg(res,
rr->ctrl_features[type].default_ctrl,
rr->ctrl_features[type].evt, &dom->comp->cfg[i]);
d->ctrl_val[type][i] = rr->ctrl_features[type].default_ctrl;
}
}
}
void resctrl_resource_reset(void)
{
struct mpam_resctrl_res *res;
struct mpam_resctrl_dom *dom;
struct rdt_domain *d;
for_each_supported_resctrl_exports(res) {
if (!res->resctrl_res.alloc_capable)
continue;
list_for_each_entry(d, &res->resctrl_res.domains, list) {
dom = container_of(d, struct mpam_resctrl_dom,
resctrl_dom);
mpam_reset_cfg(res, dom, d);
}
}
mpam_reset_devices();
/*
* reset CDP configuration used in recreating schema list nodes.
*/
resctrl_cdp_enabled = false;
}