openEuler_kernel_rk3588/arch/arm64/kernel/mpam/mpam_resctrl.c

// SPDX-License-Identifier: GPL-2.0+
/*
 * Common code for ARM v8 MPAM
 *
 * Copyright (C) 2018-2019 Huawei Technologies Co., Ltd
 *
 * Author: Xie XiuQi <xiexiuqi@huawei.com>
 *
 * Code was partially borrowed from arch/x86/kernel/cpu/intel_rdt*.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * More information about MPAM be found in the Arm Architecture Reference
 * Manual.
 *
 * https://static.docs.arm.com/ddi0598/a/DDI0598_MPAM_supp_armv8a.pdf
 */

#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt

#include <linux/slab.h>
#include <linux/err.h>
#include <linux/cacheinfo.h>
#include <linux/cpuhotplug.h>
#include <linux/task_work.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/arm_mpam.h>

#include <asm/mpam_sched.h>
#include <asm/mpam.h>
#include <asm/io.h>

#include "mpam_device.h"
#include "mpam_resource.h"
#include "mpam_internal.h"

/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(resctrl_group_mutex);

/*
 * The cached intel_pqr_state is strictly per CPU and can never be
 * updated from a remote CPU. Functions which modify the state
 * are called with interrupts disabled and no preemption, which
 * is sufficient for the protection.
 */
DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);

/*
 * Used to store the max resource name width and max resource data width
 * to display the schemata in a tabular format
 */
int max_name_width, max_data_width;

/*
 * Global boolean for rdt_alloc which is true if any
 * resource allocation is enabled.
 */
bool rdt_alloc_capable;

/*
 * Indicate if had mount cdpl2/cdpl3 option.
 */
static bool resctrl_cdp_enabled;

/*
 * Hi1620 2P Base Address Map
 *
 * AFF2 | NODE | DIE   | Base Address
 * ------------------------------------
 *   01 |    0 | P0 TB | 0x000098xxxxxx
 *   03 |    1 | P0 TA | 0x000090xxxxxx
 *   05 |    2 | P1 TB | 0x200098xxxxxx
 *   07 |    3 | P2 TA | 0x200090xxxxxx
 *
 *   AFF2: MPIDR.AFF2
 */

int mpam_resctrl_set_default_cpu(unsigned int cpu)
{
	/* The cpu is set in default rdtgroup after online. */
	cpumask_set_cpu(cpu, &resctrl_group_default.cpu_mask);

	/* Update CPU mpam sysregs' default setting when cdp enabled */
	if (resctrl_cdp_enabled)
		resctrl_cdp_update_cpus_state(&resctrl_group_default);

	return 0;
}

void mpam_resctrl_clear_default_cpu(unsigned int cpu)
{
	struct resctrl_group *rdtgrp;

	list_for_each_entry(rdtgrp, &resctrl_all_groups, resctrl_group_list) {
		/* The cpu is clear in associated rdtgroup after offline. */
		cpumask_clear_cpu(cpu, &rdtgrp->cpu_mask);
	}
}

bool is_resctrl_cdp_enabled(void)
{
	return !!resctrl_cdp_enabled;
}

static void
mpam_resctrl_update_component_cfg(struct resctrl_resource *r,
	struct rdt_domain *d, struct sd_closid *closid);

static void
common_wrmsr(struct resctrl_resource *r, struct rdt_domain *d,
	struct msr_param *para);

static u64 cache_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
	struct msr_param *para);
static u64 mbw_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
	struct msr_param *para);

static u64 cache_rdmon(struct rdt_domain *d, void *md_priv);
static u64 mbw_rdmon(struct rdt_domain *d, void *md_priv);

static int common_wrmon(struct rdt_domain *d, void *md_priv);

static int parse_cache(char *buf, struct resctrl_resource *r,
	struct resctrl_staged_config *cfg, enum resctrl_ctrl_type ctrl_type);
static int parse_bw(char *buf, struct resctrl_resource *r,
	struct resctrl_staged_config *cfg, enum resctrl_ctrl_type ctrl_type);

struct raw_resctrl_resource raw_resctrl_resources_all[] = {
	[RDT_RESOURCE_L3] = {
		.msr_update     = common_wrmsr,
		.msr_read       = cache_rdmsr,
		.parse_ctrlval  = parse_cache,
		.format_str     = "%d=%0*x",
		.mon_read       = cache_rdmon,
		.mon_write      = common_wrmon,
		.fflags         = RFTYPE_RES_CACHE,
		.ctrl_features  = {
			[SCHEMA_COMM] = {
				.type = SCHEMA_COMM,
				.flags = SCHEMA_COMM,
				.name = "comm",
				.base = 16,
				.evt = QOS_CAT_CPBM_EVENT_ID,
				.capable = 1,
				.ctrl_suffix = "",
			},
			[SCHEMA_PRI] = {
				.type = SCHEMA_PRI,
				.flags = SCHEMA_PRI,
				.name = "caPrio",
				.base = 10,
				.evt = QOS_CAT_INTPRI_EVENT_ID,
				.ctrl_suffix = "PRI",
			},
			[SCHEMA_PBM] = {
				.type = SCHEMA_PBM,
				.flags = SCHEMA_COMM,
				.name = "caPbm",
				.base = 16,
				.evt = QOS_CAT_CPBM_EVENT_ID,
				.ctrl_suffix = "PBM",
			},
			[SCHEMA_MAX] = {
				.type = SCHEMA_MAX,
				.flags = SCHEMA_COMM,
				.name = "caMax",
				.base = 10,
				.evt = QOS_CAT_CMAX_EVENT_ID,
				.ctrl_suffix = "MAX",
			},
		},
	},
	[RDT_RESOURCE_L2] = {
		.msr_update     = common_wrmsr,
		.msr_read       = cache_rdmsr,
		.parse_ctrlval  = parse_cache,
		.format_str     = "%d=%0*x",
		.mon_read       = cache_rdmon,
		.mon_write      = common_wrmon,
		.fflags         = RFTYPE_RES_CACHE,
		.ctrl_features  = {
			[SCHEMA_COMM] = {
				.type = SCHEMA_COMM,
				.flags = SCHEMA_COMM,
				.name = "comm",
				.base = 16,
				.evt = QOS_CAT_CPBM_EVENT_ID,
				.capable = 1,
				.ctrl_suffix = "",
			},
			[SCHEMA_PRI] = {
				.type = SCHEMA_PRI,
				.flags = SCHEMA_PRI,
				.name = "caPrio",
				.base = 10,
				.evt = QOS_CAT_INTPRI_EVENT_ID,
				.ctrl_suffix = "PRI",
			},
			[SCHEMA_PBM] = {
				.type = SCHEMA_PBM,
				.flags = SCHEMA_COMM,
				.name = "caPbm",
				.base = 16,
				.evt = QOS_CAT_CPBM_EVENT_ID,
				.ctrl_suffix = "PBM",
			},
			[SCHEMA_MAX] = {
				.type = SCHEMA_MAX,
				.flags = SCHEMA_COMM,
				.name = "caMax",
				.base = 10,
				.evt = QOS_CAT_CMAX_EVENT_ID,
				.ctrl_suffix = "MAX",
			},
		},
	},
	[RDT_RESOURCE_MC] = {
		.msr_update     = common_wrmsr,
		.msr_read       = mbw_rdmsr,
		.parse_ctrlval  = parse_bw,
		.format_str     = "%d=%0*d",
		.mon_read       = mbw_rdmon,
		.mon_write      = common_wrmon,
		.fflags         = RFTYPE_RES_MB,
		.ctrl_features  = {
			[SCHEMA_COMM] = {
				.type = SCHEMA_COMM,
				.flags = SCHEMA_COMM,
				.name = "comm",
				.base = 10,
				.evt = QOS_MBA_MAX_EVENT_ID,
				.capable = 1,
				.ctrl_suffix = "",
			},
			[SCHEMA_PRI] = {
				.type = SCHEMA_PRI,
				.flags = SCHEMA_PRI,
				.name = "mbPrio",
				.base = 10,
				.evt = QOS_MBA_INTPRI_EVENT_ID,
				.ctrl_suffix = "PRI",
			},
			[SCHEMA_HDL] = {
				.type = SCHEMA_HDL,
				.flags = SCHEMA_HDL,
				.name = "mbHdl",
				.base = 10,
				.evt = QOS_MBA_HDL_EVENT_ID,
				.ctrl_suffix = "HDL",
			},
			[SCHEMA_PBM] = {
				.type = SCHEMA_PBM,
				.flags = SCHEMA_COMM,
				.name = "mbPbm",
				.base = 16,
				.evt = QOS_MBA_PBM_EVENT_ID,
				.ctrl_suffix = "PBM",
			},
			[SCHEMA_MAX] = {
				.type = SCHEMA_MAX,
				.flags = SCHEMA_COMM,
				.name = "mbMax",
				.base = 10,
				.evt = QOS_MBA_MAX_EVENT_ID,
				.ctrl_suffix = "MAX",
			},
			[SCHEMA_MIN] = {
				.type = SCHEMA_MIN,
				.flags = SCHEMA_COMM,
				.name = "mbMin",
				.base = 10,
				.evt = QOS_MBA_MIN_EVENT_ID,
				.ctrl_suffix = "MIN",
			},
		},
	},
};

struct raw_resctrl_resource *
mpam_get_raw_resctrl_resource(enum resctrl_resource_level level)
{
	if (level >= RDT_NUM_RESOURCES)
		return NULL;

	return &raw_resctrl_resources_all[level];
}

/*
 * Read one cache schema row. Check that it is valid for the current
 * resource type.
 */
static int
parse_cache(char *buf, struct resctrl_resource *r,
		struct resctrl_staged_config *cfg,
		enum resctrl_ctrl_type type)
{
	unsigned long data;
	struct raw_resctrl_resource *rr = r->res;

	if (cfg->have_new_ctrl) {
		rdt_last_cmd_printf("duplicate domain\n");
		return -EINVAL;
	}

	if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
		return -EINVAL;

	if (data >= rr->ctrl_features[type].max_wd)
		return -EINVAL;

	cfg->new_ctrl[type] = data;
	cfg->have_new_ctrl = true;

	return 0;
}

static int
parse_bw(char *buf, struct resctrl_resource *r,
		struct resctrl_staged_config *cfg,
		enum resctrl_ctrl_type type)
{
	unsigned long data;
	struct raw_resctrl_resource *rr = r->res;

	if (cfg->have_new_ctrl) {
		rdt_last_cmd_printf("duplicate domain\n");
		return -EINVAL;
	}

	switch (rr->ctrl_features[type].evt) {
	case QOS_MBA_MAX_EVENT_ID:
	case QOS_MBA_PBM_EVENT_ID:
		if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
			return -EINVAL;
		data = (data < r->mbw.min_bw) ? r->mbw.min_bw : data;
		data = roundup(data, r->mbw.bw_gran);
		break;
	case QOS_MBA_MIN_EVENT_ID:
		if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
			return -EINVAL;
		/* for mbw min feature, 0 of setting is allowed */
		data = roundup(data, r->mbw.bw_gran);
		break;
	default:
		if (kstrtoul(buf, rr->ctrl_features[type].base, &data))
			return -EINVAL;
		break;
	}

	if (data >= rr->ctrl_features[type].max_wd)
		return -EINVAL;

	cfg->new_ctrl[type] = data;
	cfg->have_new_ctrl = true;

	return 0;
}

static void
common_wrmsr(struct resctrl_resource *r, struct rdt_domain *d,
			struct msr_param *para)
{
	struct sync_args args;
	struct mpam_resctrl_dom *dom;

	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	mpam_resctrl_update_component_cfg(r, d, para->closid);

	/*
	 * so far we have accomplished configuration replication,
	 * it is ready to apply this configuration.
	 */
	args.closid = *para->closid;
	mpam_component_config(dom->comp, &args);
}

static u64 cache_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
			struct msr_param *para)
{
	u32 result;
	struct sync_args args;
	struct mpam_resctrl_dom *dom;
	struct raw_resctrl_resource *rr = r->res;

	args.closid = *para->closid;
	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	args.eventid = rr->ctrl_features[para->type].evt;
	mpam_component_get_config(dom->comp, &args, &result);

	return result;
}

static u64 mbw_rdmsr(struct resctrl_resource *r, struct rdt_domain *d,
			struct msr_param *para)
{
	u32 result;
	struct sync_args args;
	struct mpam_resctrl_dom *dom;
	struct raw_resctrl_resource *rr = r->res;

	args.closid = *para->closid;
	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	args.eventid = rr->ctrl_features[para->type].evt;
	mpam_component_get_config(dom->comp, &args, &result);

	switch (rr->ctrl_features[para->type].evt) {
	case QOS_MBA_MAX_EVENT_ID:
	case QOS_MBA_MIN_EVENT_ID:
	case QOS_MBA_PBM_EVENT_ID:
		result = roundup(result, r->mbw.bw_gran);
		break;
	default:
		break;
	}

	return result;
}

/*
 * use pmg as monitor id
 * just use match_pardid only.
 */
static u64 cache_rdmon(struct rdt_domain *d, void *md_priv)
{
	int err;
	u64 result;
	union mon_data_bits md;
	struct sync_args args;
	struct mpam_resctrl_dom *dom;
	unsigned long timeout;

	md.priv = md_priv;

	/* monitoring only need reqpartid */
	args.closid.reqpartid = md.u.partid;
	args.mon = md.u.mon;
	args.pmg = md.u.pmg;
	args.match_pmg = true;
	args.eventid = QOS_L3_OCCUP_EVENT_ID;

	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	/**
	 * We should judge if return is OK, it is possible affected
	 * by NRDY bit.
	 */
	timeout = READ_ONCE(jiffies) + (1*SEC_CONVERSION);
	do {
		if (time_after(READ_ONCE(jiffies), timeout)) {
			err = -ETIMEDOUT;
			break;
		}
		err = mpam_component_mon(dom->comp, &args, &result);
		/* Currently just report it */
		WARN_ON(err && (err != -EBUSY));
	} while (err == -EBUSY);

	return result;
}
/*
 * use pmg as monitor id
 * just use match_pardid only.
 */
static u64 mbw_rdmon(struct rdt_domain *d, void *md_priv)
{
	int err;
	u64 result;
	union mon_data_bits md;
	struct sync_args args;
	struct mpam_resctrl_dom *dom;
	unsigned long timeout;

	md.priv = md_priv;

	/* monitoring only need reqpartid */
	args.closid.reqpartid = md.u.partid;
	args.mon = md.u.mon;
	args.pmg = md.u.pmg;
	args.match_pmg = true;
	args.eventid = QOS_L3_MBM_LOCAL_EVENT_ID;

	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	/**
	 * We should judge if return is OK, it is possible affected
	 * by NRDY bit.
	 */
	timeout = READ_ONCE(jiffies) + (1*SEC_CONVERSION);
	do {
		if (time_after(READ_ONCE(jiffies), timeout)) {
			err = -ETIMEDOUT;
			break;
		}
		err = mpam_component_mon(dom->comp, &args, &result);
		/* Currently just report it */
		WARN_ON(err && (err != -EBUSY));
	} while (err == -EBUSY);

	return result;
}

static int
common_wrmon(struct rdt_domain *d, void *md_priv)
{
	u64 result;
	union mon_data_bits md;
	struct sync_args args;
	struct mpam_resctrl_dom *dom;

	md.priv = md_priv;
	/* monitoring only need reqpartid */
	args.closid.reqpartid = md.u.partid;
	args.mon = md.u.mon;
	args.pmg = md.u.pmg;

	args.match_pmg = true;

	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	/**
	 * We needn't judge if return is OK, we just want to configure
	 * monitor info.
	 */
	mpam_component_mon(dom->comp, &args, &result);

	return 0;
}

/*
 * Notifing resctrl_id_init() should be called after calling parse_
 * resctrl_group_fs_options() to guarantee resctrl_cdp_enabled() active.
 *
 * Using a global CLOSID across all resources has some advantages and
 * some drawbacks:
 * + We can simply set "current->closid" to assign a task to a resource
 *   group.
 * + Context switch code can avoid extra memory references deciding which
 *   CLOSID to load into the PQR_ASSOC MSR
 * - We give up some options in configuring resource groups across multi-socket
 *   systems.
 * - Our choices on how to configure each resource become progressively more
 *   limited as the number of resources grows.
 */

static int num_intpartid, num_reqpartid;
static unsigned long *intpartid_free_map;

static void mpam_resctrl_closid_collect(void)
{
	struct mpam_resctrl_res *res;
	struct raw_resctrl_resource *rr;

	/*
	 * num_reqpartid refers to the maximum partid number
	 * that system width provides.
	 */
	num_reqpartid = mpam_sysprops_num_partid();
	/*
	 * we make intpartid the closid, this is because when
	 * system platform supports intpartid narrowing, this
	 * intpartid concept represents the resctrl maximum
	 * group we can create, so it should be less than
	 * maximum reqpartid number and maximum closid number
	 * allowed by resctrl sysfs provided by @Intel-RDT.
	 */
	num_intpartid = mpam_sysprops_num_partid();
	num_intpartid = min(num_reqpartid, RESCTRL_MAX_CLOSID);

	/*
	 * as we know we make intpartid the closid given to
	 * resctrl, we should know if any resource supports
	 * intpartid narrowing.
	 */
	for_each_supported_resctrl_exports(res) {
		rr = res->resctrl_res.res;
		if (!rr->num_intpartid)
			continue;
		num_intpartid = min(num_intpartid, (int)rr->num_intpartid);
	}
}

static u32 get_nr_closid(void)
{
	if (!intpartid_free_map)
		return 0;

	return num_intpartid;
}

int closid_bitmap_init(void)
{
	int pos;
	u32 times, flag;
	u32 bits_num;

	mpam_resctrl_closid_collect();
	bits_num = num_intpartid;
	hw_alloc_times_validate(times, flag);
	bits_num = rounddown(bits_num, times);
	if (!bits_num)
		return -EINVAL;

	if (intpartid_free_map)
		kfree(intpartid_free_map);

	intpartid_free_map = bitmap_zalloc(bits_num, GFP_KERNEL);
	if (!intpartid_free_map)
		return -ENOMEM;

	bitmap_set(intpartid_free_map, 0, bits_num);

	/* CLOSID 0 is always reserved for the default group */
	pos = find_first_bit(intpartid_free_map, bits_num);
	bitmap_clear(intpartid_free_map, pos, times);

	return 0;
}

/**
 * struct rmid_transform - Matrix for transforming rmid to partid and pmg
 * @rows:           Number of bits for remap_body[:] bitmap
 * @clos:           Number of bitmaps
 * @nr_usage:       Number rmid we have
 * @step_size:      Step size from traversing the point of matrix once
 * @step_cnt:       Indicates how many times to traverse(.e.g if cdp;step_cnt=2)
 * @remap_body:     Storing bitmaps' entry and itself
 */
struct rmid_transform {
	u32 rows;
	u32 cols;
	u32 nr_usage;
	int step_size;
	int step_cnt;
	unsigned long **remap_body;
};
static struct rmid_transform rmid_remap_matrix;
DEFINE_STATIC_KEY_FALSE(rmid_remap_enable_key);

static u32 get_nr_rmids(void)
{
	if (!static_branch_likely(&rmid_remap_enable_key))
		return 0;

	return rmid_remap_matrix.nr_usage;
}

/*
 * a rmid remap matrix is delivered for transforming partid pmg to rmid,
 * this matrix is organized like this:
 *
 *                  [bitmap entry indexed by partid]
 *
 *                  [0]   [1]  [2]  [3]   [4]  [5]
 *             occ   1     0    0    1     1    1
 *      bitmap[:0]   1     0    0    1     1    1
 *      bitmap[:1]   1     1    1    1     1    1
 *      bitmap[:2]   1     1    1    1     1    1
 *     [pos is pmg]
 *
 * Calculate rmid = partid + NR_partid * pmg
 *
 * occ represents if this bitmap has been used by a partid, it is because
 * a certain partid should not be accompany with a duplicated pmg for
 * monitoring, this design easily saves a lot of space, and can also decrease
 * time complexity of allocating and free rmid process from O(NR_partid)*
 * O(NR_pmg) to O(NR_partid) + O(log(NR_pmg)) compared with using list.
 */
static int set_rmid_remap_matrix(u32 rows, u32 cols)
{
	u32 times, flag;
	int ret, col;

	/*
	 * cols stands for partid, so if cdp enabled we must
	 * keep at least two partid for LxCODE and LxDATA
	 * respectively once time.
	 */
	hw_alloc_times_validate(times, flag);
	rmid_remap_matrix.cols = rounddown(cols, times);
	rmid_remap_matrix.step_cnt = times;
	if (times > rmid_remap_matrix.cols)
		return -EINVAL;
	/*
	 * if only pmg(Performance Monitor Group)
	 * work on the monitor, step_size must be
	 * set to maximum number of columns,
	 * otherwise set it to 1, such as kunpeng
	 * 920 does.
	 */
	rmid_remap_matrix.step_size = 1;

	/*
	 * first row of rmid remap matrix is used for indicating
	 * if remap bitmap is occupied by a col index.
	 */
	rmid_remap_matrix.rows = rows + 1;

	if (rows == 0 || cols == 0)
		return -EINVAL;

	rmid_remap_matrix.nr_usage = rows * cols;

	/* free history pointer for matrix recreation */
	if (rmid_remap_matrix.remap_body) {
		for (col = 0; col < cols; col++) {
			if (!rmid_remap_matrix.remap_body[col])
				continue;
			kfree(rmid_remap_matrix.remap_body[col]);
		}
		kfree(rmid_remap_matrix.remap_body);
	}

	rmid_remap_matrix.remap_body = kcalloc(rmid_remap_matrix.cols,
			sizeof(*rmid_remap_matrix.remap_body), GFP_KERNEL);
	if (!rmid_remap_matrix.remap_body)
		return -ENOMEM;

	for (col = 0; col < cols; col++) {
		if (rmid_remap_matrix.remap_body[col])
			kfree(rmid_remap_matrix.remap_body[col]);

		rmid_remap_matrix.remap_body[col] =
				bitmap_zalloc(rmid_remap_matrix.rows,
				GFP_KERNEL);
		if (!rmid_remap_matrix.remap_body[col]) {
			ret = -ENOMEM;
			goto clean;
		}

		bitmap_set(rmid_remap_matrix.remap_body[col],
				0, rmid_remap_matrix.rows);
	}

	/* make column entry of rmid matrix visible */
	static_branch_enable_cpuslocked(&rmid_remap_enable_key);

	return 0;
clean:
	for (col = 0; col < cols; col++) {
		if (!rmid_remap_matrix.remap_body[col])
			continue;
		kfree(rmid_remap_matrix.remap_body[col]);
		rmid_remap_matrix.remap_body[col] = NULL;
	}
	if (rmid_remap_matrix.remap_body) {
		kfree(rmid_remap_matrix.remap_body);
		rmid_remap_matrix.remap_body = NULL;
	}

	/* if recreation failed, cannot use rmid remap matrix */
	static_branch_disable_cpuslocked(&rmid_remap_enable_key);

	return ret;
}

static u32 probe_rmid_remap_matrix_cols(void)
{
	return (u32)num_reqpartid;
}

static u32 probe_rmid_remap_matrix_rows(void)
{
	return (u32)mpam_sysprops_num_pmg();
}

static inline unsigned long **__rmid_remap_bmp(u32 col)
{
	if (!static_branch_likely(&rmid_remap_enable_key))
		return NULL;

	if (col >= rmid_remap_matrix.cols)
		return NULL;

	return rmid_remap_matrix.remap_body + col;
}

/*
 *  these macros defines how can we traverse rmid remap matrix, there are
 *  three scenarios:
 *
 *  (1) step_size is default set to 1, if only PMG(NR_PMG=4) works, makes
 *      it equals to number of columns, step_cnt means how many times are
 *      allocated and released each time, at this time rmid remap matrix
 *      looks like:
 *
 *        ^
 *        |
 *         ------column------>
 *
 *       RMID  0   1   2   3   (step_size=1)
 *             `---'
 *                `--> (step_cnt=2 if cdp enabled)
 *
 *       RMID  0   1   2   3   (step_size=1)
 *             `--
 *                `--> (step_cnt=1 if cdp disabled)
 *
 *  (2) if PARTID(NR_PARTID=4) and PMG(NR_PMG=4) works together, at this
 *      time rmid remap matrix looks like:
 *
 *       ------------row------------>
 *      |
 *      |  RMID  0   1   2   3   (step_size=1)
 *      |        `---'
 *      |           `--> (step_cnt=2 if cdp enabled)
 *      |        4   5   6   7
 *      |        8   9   10  11
 *      v        12  13  14  15
 *
 *  (3) step_size not equal to 1, cross-line traversal, but this scenario
 *      did not happen yet.
 */

#define __xy_initialize(x, y, from)           		\
	(x = from, y = 0)
#define __xy_overflow(x, y)				\
	(y >= rmid_remap_matrix.cols)
#define __x_forward(x)					\
	(x = (x + 1) % rmid_remap_matrix.cols)
#define __y_forward(x, y)				\
	(y += ((x) ? 0 : 1))

#define __step_xy_initialize(step, x, y, from)		\
	(x = from, step = 1, y = 0)
#define __step_align(from)				\
	(!(from % (rmid_remap_matrix.step_size *	\
		rmid_remap_matrix.step_cnt)))
#define __step_overflow(step)				\
	(__xy_overflow(x, y) ||				\
		(step > rmid_remap_matrix.step_cnt))
#define __step_x_forward(x)				\
	__x_forward(x)
#define __step_forward(step, x)				\
	(step += ((x % rmid_remap_matrix.step_size) ? 0 : 1))
#define __step_y_forward(x, y)				\
	__y_forward(x, y)

#define for_each_rmid_transform_point_step_from(p_entry, step, x, y, from)	\
	for (__step_xy_initialize(step, x, y, from),				\
		(p_entry) = __rmid_remap_bmp((from));				\
		__step_align(from) && !__step_overflow(step);			\
		__step_x_forward(x),						\
		__step_forward(step, x),					\
		__step_y_forward(x, y),						\
		(p_entry) = __rmid_remap_bmp(x))				\
			if (unlikely(((p_entry) == NULL) ||			\
				(*p_entry) == NULL))				\
				WARN_ON_ONCE(1);				\
			else

#define for_each_rmid_transform_point_from(p_entry, x, y, from)			\
	for (__xy_initialize(x, y, from),					\
		(p_entry) = __rmid_remap_bmp((from));				\
		!__xy_overflow(x, y);						\
		__x_forward(x),							\
		__y_forward(x, y),						\
		(p_entry) = __rmid_remap_bmp(x))				\
			if (unlikely(((p_entry) == NULL) ||			\
				(*p_entry) == NULL))				\
				WARN_ON_ONCE(1);				\
			else

static void set_rmid_remap_bmp_occ(unsigned long *bmp)
{
	clear_bit(0, bmp);
}

static void unset_rmid_remap_bmp_occ(unsigned long *bmp)
{
	set_bit(0, bmp);
}

static int is_rmid_remap_bmp_bdr_set(unsigned long *bmp, int b)
{
	return (test_bit(b + 1, bmp) == 0) ? 1 : 0;
}

static void rmid_remap_bmp_bdr_set(unsigned long *bmp, int b)
{
	set_bit(b + 1, bmp);
}

static void rmid_remap_bmp_bdr_clear(unsigned long *bmp, int b)
{
	clear_bit(b + 1, bmp);
}

static int is_rmid_remap_bmp_occ(unsigned long *bmp)
{
	return (find_first_bit(bmp, rmid_remap_matrix.rows) == 0) ? 0 : 1;
}

static int is_rmid_remap_bmp_full(unsigned long *bmp)
{
	return ((is_rmid_remap_bmp_occ(bmp) &&
			bitmap_weight(bmp, rmid_remap_matrix.rows) ==
			(rmid_remap_matrix.rows-1)) ||
			bitmap_full(bmp, rmid_remap_matrix.rows));
}

static int rmid_remap_bmp_find_step_entry(int partid)
{
	int x, y;
	unsigned long **bmp;

	if (rmid_remap_matrix.step_size ==
		rmid_remap_matrix.cols)
		return 0;

	/* step entry should be non-occupied and aligned */
	bmp = __rmid_remap_bmp(partid);
	if (bmp)
		return (is_rmid_remap_bmp_occ(*bmp) ||
			!__step_align(partid)) ? -ENOSPC : partid;

	for_each_rmid_transform_point_from(bmp, x, y, 0) {
		/*
		 * do not waste partid resource, start
		 * from step aligned position.
		 */
		if (__step_align(x) && !is_rmid_remap_bmp_occ(*bmp))
			return x;
	}

	return -ENOSPC;
}

static int rmid_remap_bmp_alloc_pmg(unsigned long *bmp)
{
	int pos;

	pos = find_first_bit(bmp, rmid_remap_matrix.rows);
	if (pos == rmid_remap_matrix.rows)
		return -ENOSPC;

	clear_bit(pos, bmp);
	return pos - 1;
}

static int rmid_remap_matrix_init(void)
{
	int x, y, step, ret;
	u32 cols, rows;
	unsigned long **bmp;

	cols = probe_rmid_remap_matrix_cols();
	rows = probe_rmid_remap_matrix_rows();

	ret = set_rmid_remap_matrix(rows, cols);
	if (ret)
		goto out;

	/*
	 * if CDP disabled, drop partid = 0, pmg = 0
	 * from bitmap for root resctrl group reserving
	 * default rmid, otherwise drop partid = 0 and
	 * partid = 1 for LxCACHE, LxDATA reservation.
	 */
	for_each_rmid_transform_point_step_from(bmp, step, x, y, 0) {
		set_rmid_remap_bmp_occ(*bmp);
		rmid_remap_bmp_alloc_pmg(*bmp);
	}

	ret = rmid_mon_ptrs_init(rmid_remap_matrix.nr_usage);
	if (ret)
		goto out;

	return 0;
out:
	return ret;
}

int resctrl_id_init(void)
{
	int ret;

	ret = closid_bitmap_init();
	if (ret)
		return ret;

	return rmid_remap_matrix_init();
}

static int is_rmid_valid(int rmid)
{
	return ((u32)rmid >= rmid_remap_matrix.nr_usage) ? 0 : 1;
}

static int to_rmid(int partid, int pmg)
{
	return (partid + (rmid_remap_matrix.cols * pmg));
}

static int rmid_to_partid_pmg(int rmid, int *partid, int *pmg)
{
	if (!is_rmid_valid(rmid))
		return -EINVAL;

	if (pmg)
		*pmg = rmid / rmid_remap_matrix.cols;
	if (partid)
		*partid = rmid % rmid_remap_matrix.cols;
	return 0;
}

static int __rmid_alloc(int partid, int pmg)
{
	int x, y, step, ret, rmid;
	bool checkpmg = false;
	unsigned long **bmp;

	if (pmg >= 0)
		checkpmg = true;

	/* traverse from first non-occupied and step-aligned entry */
	ret = rmid_remap_bmp_find_step_entry(partid);
	if (ret < 0)
		goto out;
	partid = ret;

	for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) {
		set_rmid_remap_bmp_occ(*bmp);

		/* checking if the given pmg is available */
		if (checkpmg) {
			/*
			 * it can only happened in step_size aligned
			 * position, so it does not exist pmgs cleared
			 * before.
			 */
			if (is_rmid_remap_bmp_bdr_set(*bmp, pmg + y)) {
				ret = -EEXIST;
				goto out;
			}
			rmid_remap_bmp_bdr_clear(*bmp, pmg + y);
			continue;
		}

		/* alloc available pmg */
		ret = rmid_remap_bmp_alloc_pmg(*bmp);
		if (ret < 0)
			goto out;
		/* always return first pmg */
		if (pmg < 0)
			pmg = ret;
	}

	rmid = to_rmid(partid, pmg);
	if (!is_rmid_valid(rmid)) {
		ret = -ENOSPC;
		goto out;
	}
	ret = assoc_rmid_with_mon(rmid);
	if (ret) {
		rmid_free(rmid);
		goto out;
	}

	return rmid;
out:
	return ret;
}

int rmid_alloc(int partid)
{
	return __rmid_alloc(partid, -1);
}

void rmid_free(int rmid)
{
	int x, y, step, partid, pmg;
	unsigned long **bmp;

	if (rmid_to_partid_pmg(rmid, &partid, &pmg))
		return;

	for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) {
		rmid_remap_bmp_bdr_set(*bmp, pmg + y);
		if (is_rmid_remap_bmp_full(*bmp))
			unset_rmid_remap_bmp_occ(*bmp);
	}

	deassoc_rmid_with_mon(rmid);
}

int mpam_rmid_to_partid_pmg(int rmid, int *partid, int *pmg)
{
	return rmid_to_partid_pmg(rmid, partid, pmg);
}
EXPORT_SYMBOL(mpam_rmid_to_partid_pmg);

/*
 * If cdp enabled, allocate two closid once time, then return first
 * allocated id.
 */
int closid_alloc(void)
{
	int pos;
	u32 times, flag;

	hw_alloc_times_validate(times, flag);

	pos = find_first_bit(intpartid_free_map, num_intpartid);
	if (pos == num_intpartid)
		return -ENOSPC;

	bitmap_clear(intpartid_free_map, pos, times);

	return pos;
}

void closid_free(int closid)
{
	u32 times, flag;

	hw_alloc_times_validate(times, flag);
	bitmap_set(intpartid_free_map, closid, times);
}

/*
 * Choose a width for the resource name and resource data based on the
 * resource that has widest name and cbm.
 */
static void mpam_init_padding(void)
{
	int cl;
	struct mpam_resctrl_res *res;
	struct resctrl_resource *r;
	struct raw_resctrl_resource *rr;

	for_each_supported_resctrl_exports(res) {
		r = &res->resctrl_res;

		cl = strlen(r->name);
		if (cl > max_name_width)
			max_name_width = cl;

		rr = r->res;
		if (!rr)
			continue;
		cl = rr->data_width;
		if (cl > max_data_width)
			max_data_width = cl;
	}
}

void post_resctrl_mount(void)
{
	if (rdt_alloc_capable)
		static_branch_enable_cpuslocked(&resctrl_alloc_enable_key);
	if (rdt_mon_capable)
		static_branch_enable_cpuslocked(&resctrl_mon_enable_key);

	if (rdt_alloc_capable || rdt_mon_capable)
		static_branch_enable_cpuslocked(&resctrl_enable_key);
}

void release_rdtgroupfs_options(void)
{
}

void disable_cdp(void)
{
	struct mpam_resctrl_res *res;
	struct resctrl_resource *r;

	for_each_supported_resctrl_exports(res) {
		r = &res->resctrl_res;
		r->cdp_enable = false;
	}

	resctrl_cdp_enabled = false;
}

static int try_to_enable_cdp(enum resctrl_resource_level level)
{
	struct resctrl_resource *r = mpam_resctrl_get_resource(level);

	if (!r || !r->cdp_capable)
		return -EINVAL;

	r->cdp_enable = true;

	resctrl_cdp_enabled = true;
	return 0;
}

int cdpl3_enable(void)
{
	return try_to_enable_cdp(RDT_RESOURCE_L3);
}

int cdpl2_enable(void)
{
	return try_to_enable_cdp(RDT_RESOURCE_L2);
}

void basic_ctrl_enable(void)
{
	struct mpam_resctrl_res *res;
	struct raw_resctrl_resource *rr;

	for_each_supported_resctrl_exports(res) {
		rr = res->resctrl_res.res;
		/* At least SCHEMA_COMM is supported */
		rr->ctrl_features[SCHEMA_COMM].enabled = true;
	}
}

int extend_ctrl_enable(char *tok)
{
	bool match = false;
	struct resctrl_resource *r;
	struct raw_resctrl_resource *rr;
	struct mpam_resctrl_res *res;
	struct resctrl_ctrl_feature *feature;
	enum resctrl_ctrl_type type;

	for_each_supported_resctrl_exports(res) {
		r = &res->resctrl_res;
		if (!r->alloc_capable)
			continue;
		rr = r->res;
		for_each_extend_ctrl_type(type) {
			feature = &rr->ctrl_features[type];
			if (!feature->capable || !feature->name)
				continue;
			if (strcmp(feature->name, tok))
				continue;

			rr->ctrl_features[type].enabled = true;
			/*
			 * If we chose to enable a feature also embraces
			 * SCHEMA_COMM, SCHEMA_COMM will not be selected.
			 */
			if (feature->flags == SCHEMA_COMM)
				rr->ctrl_features[SCHEMA_COMM].enabled = false;;
			match = true;
		}
	}

	if (!match)
		return -EINVAL;

	return 0;
}

void extend_ctrl_disable(void)
{
	struct raw_resctrl_resource *rr;
	struct mpam_resctrl_res *res;
	struct resctrl_ctrl_feature *feature;
	enum resctrl_ctrl_type type;

	for_each_supported_resctrl_exports(res) {
		rr = res->resctrl_res.res;
		for_each_extend_ctrl_type(type) {
			feature = &rr->ctrl_features[type];
			feature->enabled = false;
		}
	}
}


/*
 * This is safe against intel_resctrl_sched_in() called from __switch_to()
 * because __switch_to() is executed with interrupts disabled. A local call
 * from update_closid_rmid() is proteced against __switch_to() because
 * preemption is disabled.
 */
void update_cpu_closid_rmid(void *info)
{
	struct rdtgroup *r = info;

	if (r) {
		this_cpu_write(pqr_state.default_closid, resctrl_navie_closid(r->closid));
		this_cpu_write(pqr_state.default_rmid, resctrl_navie_rmid(r->mon.rmid));
	}

	/*
	 * We cannot unconditionally write the MSR because the current
	 * executing task might have its own closid selected. Just reuse
	 * the context switch code.
	 */
	mpam_sched_in();
}

/*
 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 *
 * Per task closids/rmids must have been set up before calling this function.
 */
void
update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
	int cpu = get_cpu();

	if (cpumask_test_cpu(cpu, cpu_mask))
		update_cpu_closid_rmid(r);
	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
	put_cpu();
}

struct task_move_callback {
	struct callback_head	work;
	struct rdtgroup		*rdtgrp;
};

static void move_myself(struct callback_head *head)
{
	struct task_move_callback *callback;
	struct rdtgroup *rdtgrp;

	callback = container_of(head, struct task_move_callback, work);
	rdtgrp = callback->rdtgrp;

	/*
	 * If resource group was deleted before this task work callback
	 * was invoked, then assign the task to root group and free the
	 * resource group.
	 */
	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
	    (rdtgrp->flags & RDT_DELETED)) {
		current->closid = 0;
		current->rmid = 0;
		kfree(rdtgrp);
	}

	preempt_disable();
	/* update PQR_ASSOC MSR to make resource group go into effect */
	mpam_sched_in();
	preempt_enable();

	kfree(callback);
}

int __resctrl_group_move_task(struct task_struct *tsk,
				struct rdtgroup *rdtgrp)
{
	struct task_move_callback *callback;
	int ret;

	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
	if (!callback)
		return -ENOMEM;
	callback->work.func = move_myself;
	callback->rdtgrp = rdtgrp;

	/*
	 * Take a refcount, so rdtgrp cannot be freed before the
	 * callback has been invoked.
	 */
	atomic_inc(&rdtgrp->waitcount);
	ret = task_work_add(tsk, &callback->work, true);
	if (ret) {
		/*
		 * Task is exiting. Drop the refcount and free the callback.
		 * No need to check the refcount as the group cannot be
		 * deleted before the write function unlocks resctrl_group_mutex.
		 */
		atomic_dec(&rdtgrp->waitcount);
		kfree(callback);
		rdt_last_cmd_puts("task exited\n");
	} else {
		/*
		 * For ctrl_mon groups move both closid and rmid.
		 * For monitor groups, can move the tasks only from
		 * their parent CTRL group.
		 */
		if (rdtgrp->type == RDTCTRL_GROUP) {
			tsk->closid = resctrl_navie_closid(rdtgrp->closid);
			tsk->rmid = resctrl_navie_rmid(rdtgrp->mon.rmid);
		} else if (rdtgrp->type == RDTMON_GROUP) {
			if (rdtgrp->mon.parent->closid.intpartid == tsk->closid) {
				tsk->closid = resctrl_navie_closid(rdtgrp->closid);
				tsk->rmid = resctrl_navie_rmid(rdtgrp->mon.rmid);
			} else {
				rdt_last_cmd_puts("Can't move task to different control group\n");
				ret = -EINVAL;
			}
		}
	}
	return ret;
}

static int resctrl_group_seqfile_show(struct seq_file *m, void *arg)
{
	struct kernfs_open_file *of = m->private;
	struct rftype *rft = of->kn->priv;

	if (rft->seq_show)
		return rft->seq_show(of, m, arg);
	return 0;
}

static ssize_t resctrl_group_file_write(struct kernfs_open_file *of, char *buf,
				   size_t nbytes, loff_t off)
{
	struct rftype *rft = of->kn->priv;

	if (rft->write)
		return rft->write(of, buf, nbytes, off);

	return -EINVAL;
}

struct kernfs_ops resctrl_group_kf_single_ops = {
	.atomic_write_len	= PAGE_SIZE,
	.write			= resctrl_group_file_write,
	.seq_show		= resctrl_group_seqfile_show,
};

static bool is_cpu_list(struct kernfs_open_file *of)
{
	struct rftype *rft = of->kn->priv;

	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
}

static int resctrl_group_cpus_show(struct kernfs_open_file *of,
			      struct seq_file *s, void *v)
{
	struct rdtgroup *rdtgrp;
	int ret = 0;

	rdtgrp = resctrl_group_kn_lock_live(of->kn);

	if (rdtgrp) {
		seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
			   cpumask_pr_args(&rdtgrp->cpu_mask));
	} else {
		ret = -ENOENT;
	}
	resctrl_group_kn_unlock(of->kn);

	return ret;
}

static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
{
	struct rdtgroup *crgrp;

	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
	/* update the child mon group masks as well*/
	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
}

int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
{
	struct rdtgroup *r, *crgrp;
	struct list_head *head;

	/* Check whether cpus are dropped from this group */
	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
	if (cpumask_weight(tmpmask)) {
		/* Can't drop from default group */
		if (rdtgrp == &resctrl_group_default) {
			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
			return -EINVAL;
		}

		/* Give any dropped cpus to rdtgroup_default */
		cpumask_or(&resctrl_group_default.cpu_mask,
				&resctrl_group_default.cpu_mask, tmpmask);
		update_closid_rmid(tmpmask, &resctrl_group_default);
	}

	/*
	 * If we added cpus, remove them from previous group and
	 * the prev group's child groups that owned them
	 * and update per-cpu closid/rmid.
	 */
	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
	if (cpumask_weight(tmpmask)) {
		list_for_each_entry(r, &resctrl_all_groups, resctrl_group_list) {
			if (r == rdtgrp)
				continue;
			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
			if (cpumask_weight(tmpmask1))
				cpumask_rdtgrp_clear(r, tmpmask1);
		}
		update_closid_rmid(tmpmask, rdtgrp);
	}

	/* Done pushing/pulling - update this group with new mask */
	cpumask_copy(&rdtgrp->cpu_mask, newmask);

	/*
	 * Clear child mon group masks since there is a new parent mask
	 * now and update the rmid for the cpus the child lost.
	 */
	head = &rdtgrp->mon.crdtgrp_list;
	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
		update_closid_rmid(tmpmask, rdtgrp);
		cpumask_clear(&crgrp->cpu_mask);
	}

	return 0;
}

int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
		   cpumask_var_t tmpmask)
{
	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
	struct list_head *head;

	/* Check whether cpus belong to parent ctrl group */
	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
	if (cpumask_weight(tmpmask)) {
		rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
		return -EINVAL;
	}

	/* Check whether cpus are dropped from this group */
	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
	if (cpumask_weight(tmpmask)) {
		/* Give any dropped cpus to parent rdtgroup */
		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
		update_closid_rmid(tmpmask, prgrp);
	}

	/*
	 * If we added cpus, remove them from previous group that owned them
	 * and update per-cpu rmid
	 */
	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
	if (cpumask_weight(tmpmask)) {
		head = &prgrp->mon.crdtgrp_list;
		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
			if (crgrp == rdtgrp)
				continue;
			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
				tmpmask);
		}
		update_closid_rmid(tmpmask, rdtgrp);
	}

	/* Done pushing/pulling - update this group with new mask */
	cpumask_copy(&rdtgrp->cpu_mask, newmask);

	return 0;
}

static ssize_t resctrl_group_cpus_write(struct kernfs_open_file *of,
				   char *buf, size_t nbytes, loff_t off)
{
	cpumask_var_t tmpmask, newmask, tmpmask1;
	struct rdtgroup *rdtgrp;
	int ret;

	if (!buf)
		return -EINVAL;

	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
		return -ENOMEM;
	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
		free_cpumask_var(tmpmask);
		return -ENOMEM;
	}
	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
		free_cpumask_var(tmpmask);
		free_cpumask_var(newmask);
		return -ENOMEM;
	}

	rdtgrp = resctrl_group_kn_lock_live(of->kn);
	rdt_last_cmd_clear();
	if (!rdtgrp) {
		ret = -ENOENT;
		rdt_last_cmd_puts("directory was removed\n");
		goto unlock;
	}

	if (is_cpu_list(of))
		ret = cpulist_parse(buf, newmask);
	else
		ret = cpumask_parse(buf, newmask);

	if (ret) {
		rdt_last_cmd_puts("bad cpu list/mask\n");
		goto unlock;
	}

	/* check that user didn't specify any offline cpus */
	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
	if (cpumask_weight(tmpmask)) {
		ret = -EINVAL;
		rdt_last_cmd_puts("can only assign online cpus\n");
		goto unlock;
	}

	if (rdtgrp->type == RDTCTRL_GROUP)
		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
	else if (rdtgrp->type == RDTMON_GROUP)
		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
	else
		ret = -EINVAL;

unlock:
	resctrl_group_kn_unlock(of->kn);
	free_cpumask_var(tmpmask);
	free_cpumask_var(newmask);
	free_cpumask_var(tmpmask1);

	return ret ?: nbytes;
}


static int resctrl_group_task_write_permission(struct task_struct *task,
					  struct kernfs_open_file *of)
{
	const struct cred *tcred = get_task_cred(task);
	const struct cred *cred = current_cred();
	int ret = 0;

	/*
	 * Even if we're attaching all tasks in the thread group, we only
	 * need to check permissions on one of them.
	 */
	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
	    !uid_eq(cred->euid, tcred->uid) &&
	    !uid_eq(cred->euid, tcred->suid)) {
		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
		ret = -EPERM;
	}

	put_cred(tcred);
	return ret;
}

static int resctrl_group_move_task(pid_t pid, struct rdtgroup *rdtgrp,
			      struct kernfs_open_file *of)
{
	struct task_struct *tsk;
	int ret;

	rcu_read_lock();
	if (pid) {
		tsk = find_task_by_vpid(pid);
		if (!tsk) {
			rcu_read_unlock();
			rdt_last_cmd_printf("No task %d\n", pid);
			return -ESRCH;
		}
	} else {
		tsk = current;
	}

	get_task_struct(tsk);
	rcu_read_unlock();

	ret = resctrl_group_task_write_permission(tsk, of);
	if (!ret)
		ret = __resctrl_group_move_task(tsk, rdtgrp);

	put_task_struct(tsk);
	return ret;
}

static struct seq_buf last_cmd_status;
static char last_cmd_status_buf[512];

void rdt_last_cmd_clear(void)
{
	lockdep_assert_held(&resctrl_group_mutex);
	seq_buf_clear(&last_cmd_status);
}

void rdt_last_cmd_puts(const char *s)
{
	lockdep_assert_held(&resctrl_group_mutex);
	seq_buf_puts(&last_cmd_status, s);
}

void rdt_last_cmd_printf(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	lockdep_assert_held(&resctrl_group_mutex);
	seq_buf_vprintf(&last_cmd_status, fmt, ap);
	va_end(ap);
}

static int resctrl_last_cmd_status_show(struct kernfs_open_file *of,
				    struct seq_file *seq, void *v)
{
	int len;

	mutex_lock(&resctrl_group_mutex);
	len = seq_buf_used(&last_cmd_status);
	if (len)
		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
	else
		seq_puts(seq, "ok\n");
	mutex_unlock(&resctrl_group_mutex);
	return 0;
}

static int resctrl_num_closids_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	u32 flag, times;

	hw_alloc_times_validate(times, flag);

	seq_printf(seq, "%u\n", get_nr_closid() / times);
	return 0;
}

static int resctrl_cbm_mask_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;
	struct raw_resctrl_resource *rr = r->res;

	seq_printf(seq, "%x\n", rr->ctrl_features[SCHEMA_COMM].default_ctrl);
	return 0;
}

static int resctrl_min_cbm_bits_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;

	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
	return 0;
}

static int resctrl_shareable_bits_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;

	seq_printf(seq, "%x\n", r->cache.shareable_bits);
	return 0;
}

static int resctrl_features_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	enum resctrl_ctrl_type type;
	struct resctrl_resource *r = of->kn->parent->priv;
	struct raw_resctrl_resource *rr = r->res;

	for_each_extend_ctrl_type(type) {
		if (!rr->ctrl_features[type].enabled)
			continue;
		/*
		 * we define the range of ctrl features with integer,
		 * here give maximum upper bound to user space.
		 */
		switch (rr->ctrl_features[type].base) {
		case 10:
			seq_printf(seq, "%s@%u\n", rr->ctrl_features[type].name,
				rr->ctrl_features[type].max_wd - 1);
			break;
		case 16:
			seq_printf(seq, "%s@%x\n", rr->ctrl_features[type].name,
				rr->ctrl_features[type].max_wd - 1);
			break;
		default:
			break;
		}
	}
	return 0;
}

static int resctrl_min_bandwidth_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;

	seq_printf(seq, "%u\n", r->mbw.min_bw);
	return 0;
}

static int resctrl_bandwidth_gran_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;

	seq_printf(seq, "%u\n", r->mbw.bw_gran);
	return 0;
}

static int resctrl_num_rmids_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	u32 flag, times;

	hw_alloc_times_validate(times, flag);
	seq_printf(seq, "%u\n", get_nr_rmids() / times);
	return 0;
}

static int resctrl_num_monitors_show(struct kernfs_open_file *of,
				struct seq_file *seq, void *v)
{
	struct resctrl_resource *r = of->kn->parent->priv;
	struct raw_resctrl_resource *rr = r->res;
	u32 flag, times;

	hw_alloc_times_validate(times, flag);
	seq_printf(seq, "%u\n", rr->num_mon / times);
	return 0;
}


static ssize_t resctrl_group_tasks_write(struct kernfs_open_file *of,
				    char *buf, size_t nbytes, loff_t off)
{
	struct rdtgroup *rdtgrp;
	int ret = 0;
	pid_t pid;

	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
		return -EINVAL;
	rdtgrp = resctrl_group_kn_lock_live(of->kn);
	rdt_last_cmd_clear();

	if (rdtgrp)
		ret = resctrl_group_move_task(pid, rdtgrp, of);
	else
		ret = -ENOENT;

	resctrl_group_kn_unlock(of->kn);

	return ret ?: nbytes;
}

static void show_resctrl_tasks(struct rdtgroup *r, struct seq_file *s)
{
	struct task_struct *p, *t;

	rcu_read_lock();
	for_each_process_thread(p, t) {
		if ((r->type == RDTMON_GROUP &&
			t->rmid == resctrl_navie_rmid(r->mon.rmid)) ||
			(r->type == RDTCTRL_GROUP &&
			t->closid == resctrl_navie_closid(r->closid)))
			seq_printf(s, "%d\n", t->pid);
	}
	rcu_read_unlock();
}

static int resctrl_group_tasks_show(struct kernfs_open_file *of,
			       struct seq_file *s, void *v)
{
	struct rdtgroup *rdtgrp;
	int ret = 0;

	rdtgrp = resctrl_group_kn_lock_live(of->kn);
	if (rdtgrp)
		show_resctrl_tasks(rdtgrp, s);
	else
		ret = -ENOENT;
	resctrl_group_kn_unlock(of->kn);

	return ret;
}

static int resctrl_group_rmid_show(struct kernfs_open_file *of,
			       struct seq_file *s, void *v)
{
	int ret = 0;
	struct rdtgroup *rdtgrp;
	u32 flag, times;

	hw_alloc_times_validate(times, flag);

	rdtgrp = resctrl_group_kn_lock_live(of->kn);
	if (rdtgrp) {
		if (flag)
			seq_printf(s, "%u-%u\n", rdtgrp->mon.rmid,
				rdtgrp->mon.rmid + 1);
		else
			seq_printf(s, "%u\n", rdtgrp->mon.rmid);
	} else
		ret = -ENOENT;
	resctrl_group_kn_unlock(of->kn);

	return ret;
}

static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of,
		char *buf, size_t nbytes, loff_t off)
{
	struct rdtgroup *rdtgrp;
	int ret = 0;
	int partid;
	int pmg;
	int rmid;
	int old_rmid;
	int old_reqpartid;
	struct task_struct *p, *t;

	if (kstrtoint(strstrip(buf), 0, &rmid) || rmid < 0)
		return -EINVAL;

	rdtgrp = resctrl_group_kn_lock_live(of->kn);
	if (!rdtgrp) {
		ret = -ENOENT;
		goto unlock;
	}

	rdt_last_cmd_clear();

	if (rmid == 0 || rdtgrp->mon.rmid == 0) {
		ret = -EINVAL;
		rdt_last_cmd_puts("default rmid 0 is always kept\n");
		goto unlock;
	}

	ret = rmid_to_partid_pmg(rmid, &partid, &pmg);
	if (ret < 0) {
		ret = -EINVAL;
		rdt_last_cmd_puts("invalid rmid\n");
		goto unlock;
	}

	if (rmid == rdtgrp->mon.rmid)
		goto unlock;

	if (rdtgrp->type != RDTCTRL_GROUP ||
			!list_empty(&rdtgrp->mon.crdtgrp_list)) {
		ret = -EINVAL;
		rdt_last_cmd_puts("unsupported operation\n");
		goto unlock;
	}

	ret = __rmid_alloc(partid, pmg);
	if (ret < 0) {
		rdt_last_cmd_puts("set rmid failed\n");
		goto unlock;
	}

	old_rmid = rdtgrp->mon.rmid;
	old_reqpartid = rdtgrp->closid.reqpartid;

	/*
	 * we use intpartid as group control, use reqpartid for config
	 * synchronization and monitor, only update the reqpartid
	 */
	rdtgrp->closid.reqpartid = partid;
	rdtgrp->mon.rmid = rmid;

	/* update rmid for mondata */
	ret = resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp);
	if (ret) {
		rdt_last_cmd_puts("update rmid for mondata failed\n");
		goto rollback;
	}

	/* resync groups configuration */
	rdtgrp->resync = 1;
	ret = resctrl_update_groups_config(rdtgrp);
	if (ret) {
		rdt_last_cmd_puts("update groups config failed\n");
		goto rollback;
	}

	read_lock(&tasklist_lock);
	for_each_process_thread(p, t) {
		if (t->closid == rdtgrp->closid.intpartid) {
			ret = __resctrl_group_move_task(t, rdtgrp);
			if (ret) {
				read_unlock(&tasklist_lock);
				goto rollback;
			}
		}
	}
	read_unlock(&tasklist_lock);

	update_closid_rmid(&rdtgrp->cpu_mask, rdtgrp);
	rmid_free(old_rmid);

unlock:
	resctrl_group_kn_unlock(of->kn);
	if (ret)
		return ret;

	return nbytes;

rollback:
	rdtgrp->mon.rmid = old_rmid;
	rdtgrp->closid.reqpartid = old_reqpartid;

	/* the old rmid is valid, so mkdir mondata here won't fail */
	resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp);

	rdtgrp->resync = 1;
	WARN_ON_ONCE(resctrl_update_groups_config(rdtgrp));

	read_lock(&tasklist_lock);
	for_each_process_thread(p, t) {
		if (t->closid == rdtgrp->closid.intpartid)
			WARN_ON_ONCE(__resctrl_group_move_task(t, rdtgrp));
	}
	read_unlock(&tasklist_lock);

	rmid_free(rmid);
	resctrl_group_kn_unlock(of->kn);
	return ret;
}

/* rdtgroup information files for one cache resource. */
static struct rftype res_specific_files[] = {
	{
		.name		= "last_cmd_status",
		.mode		= 0444,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.seq_show	= resctrl_last_cmd_status_show,
		.fflags		= RF_TOP_INFO,
	},
	{
		.name           = "num_closids",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_num_closids_show,
		.fflags         = RF_CTRL_INFO,
	},
	{
		.name           = "cbm_mask",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_cbm_mask_show,
		.fflags         = RF_CTRL_INFO | RFTYPE_RES_CACHE,
	},
	{
		.name           = "min_cbm_bits",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_min_cbm_bits_show,
		.fflags         = RF_CTRL_INFO | RFTYPE_RES_CACHE,
	},
	{
		.name           = "shareable_bits",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_shareable_bits_show,
		.fflags         = RF_CTRL_INFO | RFTYPE_RES_CACHE,
	},
	{
		.name           = "features",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_features_show,
		.fflags         = RF_CTRL_INFO,
	},
	{
		.name           = "min_bandwidth",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_min_bandwidth_show,
		.fflags         = RF_CTRL_INFO | RFTYPE_RES_MB,
	},
	{
		.name           = "bandwidth_gran",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_bandwidth_gran_show,
		.fflags         = RF_CTRL_INFO | RFTYPE_RES_MB,
	},
	{
		.name           = "num_rmids",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_num_rmids_show,
		.fflags         = RF_MON_INFO,
	},
	{
		.name           = "num_monitors",
		.mode           = 0444,
		.kf_ops         = &resctrl_group_kf_single_ops,
		.seq_show       = resctrl_num_monitors_show,
		.fflags         = RF_MON_INFO,
	},
	{
		.name		= "cpus",
		.mode		= 0644,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.write		= resctrl_group_cpus_write,
		.seq_show	= resctrl_group_cpus_show,
		.fflags		= RFTYPE_BASE,
	},
	{
		.name		= "cpus_list",
		.mode		= 0644,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.write		= resctrl_group_cpus_write,
		.seq_show	= resctrl_group_cpus_show,
		.flags		= RFTYPE_FLAGS_CPUS_LIST,
		.fflags		= RFTYPE_BASE,
	},
	{
		.name		= "tasks",
		.mode		= 0644,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.write		= resctrl_group_tasks_write,
		.seq_show	= resctrl_group_tasks_show,
		.fflags		= RFTYPE_BASE,
	},
	{
		.name		= "rmid",
		.mode		= 0644,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.write		= resctrl_group_rmid_write,
		.seq_show	= resctrl_group_rmid_show,
		.fflags		= RFTYPE_BASE,
	},
	{
		.name		= "schemata",
		.mode		= 0644,
		.kf_ops		= &resctrl_group_kf_single_ops,
		.write		= resctrl_group_schemata_write,
		.seq_show	= resctrl_group_schemata_show,
		.fflags		= RF_CTRL_BASE,
	}
};

struct rdt_domain *mpam_find_domain(struct resctrl_resource *r, int id,
		struct list_head **pos)
{
	struct rdt_domain *d;
	struct list_head *l;

	if (id < 0)
		return ERR_PTR(id);

	list_for_each(l, &r->domains) {
		d = list_entry(l, struct rdt_domain, list);
		/* When id is found, return its domain. */
		if (id == d->id)
			return d;
		/* Stop searching when finding id's position in sorted list. */
		if (id < d->id)
			break;
	}

	if (pos)
		*pos = l;

	return NULL;
}

enum mpam_enable_type __read_mostly mpam_enabled;
static int __init mpam_setup(char *str)
{
	if (!strcmp(str, "=acpi"))
		mpam_enabled = MPAM_ENABLE_ACPI;
	else if (!strcmp(str, "=of"))
		mpam_enabled = MPAM_ENABLE_OF;

	return 1;
}
__setup("mpam", mpam_setup);

int mpam_resctrl_init(void)
{
	mpam_init_padding();

	register_resctrl_specific_files(res_specific_files,
			ARRAY_SIZE(res_specific_files));

	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
			sizeof(last_cmd_status_buf));

	return resctrl_group_init();
}

/*
 * __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
 *
 * Following considerations are made so that this has minimal impact
 * on scheduler hot path:
 * - This will stay as no-op unless we are running on an Intel SKU
 *   which supports resource control or monitoring and we enable by
 *   mounting the resctrl file system.
 * - Caches the per cpu CLOSid/RMID values and does the MSR write only
 *   when a task with a different CLOSid/RMID is scheduled in.
 * - We allocate RMIDs/CLOSids globally in order to keep this as
 *   simple as possible.
 * Must be called with preemption disabled.
 */
void __mpam_sched_in(void)
{
	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
	u64 partid_d, partid_i;
	u64 rmid = state->default_rmid;
	u64 closid = state->default_closid;
	u64 reqpartid = 0;
	u64 pmg = 0;

	/*
	 * If this task has a closid/rmid assigned, use it.
	 * Else use the closid/rmid assigned to this cpu.
	 */
	if (static_branch_likely(&resctrl_alloc_enable_key)) {
		if (current->closid)
			closid = current->closid;
	}

	if (static_branch_likely(&resctrl_mon_enable_key)) {
		if (current->rmid)
			rmid = current->rmid;
	}

	if (closid != state->cur_closid || rmid != state->cur_rmid) {
		u64 reg;

		resctrl_navie_rmid_partid_pmg(rmid, (int *)&reqpartid, (int *)&pmg);

		if (resctrl_cdp_enabled) {
			resctrl_cdp_mpamid_map_val(reqpartid, CDP_DATA, partid_d);
			resctrl_cdp_mpamid_map_val(reqpartid, CDP_CODE, partid_i);

			/* set in EL0 */
			reg = mpam_read_sysreg_s(SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
			reg = PARTID_D_SET(reg, partid_d);
			reg = PARTID_I_SET(reg, partid_i);
			reg = PMG_SET(reg, pmg);
			mpam_write_sysreg_s(reg, SYS_MPAM0_EL1, "SYS_MPAM0_EL1");

			/* set in EL1 */
			reg = mpam_read_sysreg_s(SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
			reg = PARTID_D_SET(reg, partid_d);
			reg = PARTID_I_SET(reg, partid_i);
			reg = PMG_SET(reg, pmg);
			mpam_write_sysreg_s(reg, SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
		} else {
			/* set in EL0 */
			reg = mpam_read_sysreg_s(SYS_MPAM0_EL1, "SYS_MPAM0_EL1");
			reg = PARTID_SET(reg, reqpartid);
			reg = PMG_SET(reg, pmg);
			mpam_write_sysreg_s(reg, SYS_MPAM0_EL1, "SYS_MPAM0_EL1");

			/* set in EL1 */
			reg = mpam_read_sysreg_s(SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
			reg = PARTID_SET(reg, reqpartid);
			reg = PMG_SET(reg, pmg);
			mpam_write_sysreg_s(reg, SYS_MPAM1_EL1, "SYS_MPAM1_EL1");
		}

		state->cur_rmid = rmid;
		state->cur_closid = closid;
	}
}

static void
mpam_update_from_resctrl_cfg(struct mpam_resctrl_res *res,
			u32 resctrl_cfg, enum rdt_event_id evt,
			struct mpam_config *mpam_cfg)
{
	u64 range;

	switch (evt) {
	case QOS_MBA_PBM_EVENT_ID:
		/* .. the number of bits we can set */
		range = res->class->mbw_pbm_bits;
		mpam_cfg->mbw_pbm =
				(resctrl_cfg * range) / MAX_MBA_BW;
		mpam_set_feature(mpam_feat_mbw_part, &mpam_cfg->valid);
		break;
	case QOS_MBA_MAX_EVENT_ID:
		range = MBW_MAX_BWA_FRACT(res->class->bwa_wd);
		mpam_cfg->mbw_max = (resctrl_cfg * range) / (MAX_MBA_BW - 1);
		mpam_cfg->mbw_max =
			(mpam_cfg->mbw_max > range) ? range : mpam_cfg->mbw_max;
		mpam_set_feature(mpam_feat_mbw_max, &mpam_cfg->valid);
		break;
	case QOS_MBA_MIN_EVENT_ID:
		range = MBW_MAX_BWA_FRACT(res->class->bwa_wd);
		mpam_cfg->mbw_min = (resctrl_cfg * range) / (MAX_MBA_BW - 1);
		mpam_cfg->mbw_min =
			(mpam_cfg->mbw_min > range) ? range : mpam_cfg->mbw_min;
		mpam_set_feature(mpam_feat_mbw_min, &mpam_cfg->valid);
		break;
	case QOS_MBA_HDL_EVENT_ID:
		mpam_cfg->hdl = resctrl_cfg;
		mpam_set_feature(mpam_feat_part_hdl, &mpam_cfg->valid);
		break;
	case QOS_MBA_INTPRI_EVENT_ID:
		mpam_cfg->intpri = resctrl_cfg;
		mpam_set_feature(mpam_feat_intpri_part, &mpam_cfg->valid);
		break;
	case QOS_CAT_CPBM_EVENT_ID:
		mpam_cfg->cpbm = resctrl_cfg;
		mpam_set_feature(mpam_feat_cpor_part, &mpam_cfg->valid);
		break;
	case QOS_CAT_CMAX_EVENT_ID:
		mpam_cfg->cmax = resctrl_cfg;
		mpam_set_feature(mpam_feat_ccap_part, &mpam_cfg->valid);
		break;
	case QOS_CAT_INTPRI_EVENT_ID:
		mpam_cfg->intpri = resctrl_cfg;
		mpam_set_feature(mpam_feat_intpri_part, &mpam_cfg->valid);
		break;
	default:
		break;
	}
}

/*
 * copy all ctrl type at once looks more efficient, as it
 * only needs refresh devices' state once time through
 * mpam_component_config, this feature will be checked
 * again when appling configuration.
 */
static void
mpam_resctrl_update_component_cfg(struct resctrl_resource *r,
		struct rdt_domain *d, struct sd_closid *closid)
{
	struct mpam_resctrl_dom *dom;
	struct mpam_resctrl_res *res;
	struct mpam_config *slave_mpam_cfg;
	struct raw_resctrl_resource *rr = r->res;
	enum resctrl_ctrl_type type;
	u32 intpartid = closid->intpartid;
	u32 reqpartid = closid->reqpartid;
	u32 resctrl_cfg;

	lockdep_assert_held(&resctrl_group_mutex);

	/* Out of range */
	if (intpartid >= mpam_sysprops_num_partid() ||
		reqpartid >= mpam_sysprops_num_partid())
		return;

	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
	dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom);

	/*
	 * now reqpartid is used for duplicating master's configuration,
	 * mpam_cfg[intpartid] needn't duplicate this setting,
	 * it is because only reqpartid stands for each rdtgroup's
	 * mpam_cfg index id.
	 */
	slave_mpam_cfg = &dom->comp->cfg[reqpartid];
	if (WARN_ON_ONCE(!slave_mpam_cfg))
		return;
	slave_mpam_cfg->valid = 0;

	for_each_ctrl_type(type) {
		if (!rr->ctrl_features[type].enabled)
			continue;

		resctrl_cfg = d->ctrl_val[type][intpartid];
		mpam_update_from_resctrl_cfg(res, resctrl_cfg,
			rr->ctrl_features[type].evt, slave_mpam_cfg);
	}
}

static void mpam_reset_cfg(struct mpam_resctrl_res *res,
		struct mpam_resctrl_dom *dom, struct rdt_domain *d)

{
	int i;
	struct resctrl_resource *r = &res->resctrl_res;
	struct raw_resctrl_resource *rr = r->res;
	enum resctrl_ctrl_type type;

	for (i = 0; i != mpam_sysprops_num_partid(); i++) {
		for_each_ctrl_type(type) {
			mpam_update_from_resctrl_cfg(res,
				rr->ctrl_features[type].default_ctrl,
				rr->ctrl_features[type].evt, &dom->comp->cfg[i]);
			d->ctrl_val[type][i] = rr->ctrl_features[type].default_ctrl;
		}
	}
}

void resctrl_resource_reset(void)
{
	struct mpam_resctrl_res *res;
	struct mpam_resctrl_dom *dom;
	struct rdt_domain *d;

	for_each_supported_resctrl_exports(res) {
		if (!res->resctrl_res.alloc_capable)
			continue;

		list_for_each_entry(d, &res->resctrl_res.domains, list) {
			dom = container_of(d, struct mpam_resctrl_dom,
					resctrl_dom);
			mpam_reset_cfg(res, dom, d);
		}
	}

	mpam_reset_devices();

	/*
	 * reset CDP configuration used in recreating schema list nodes.
	 */
	resctrl_cdp_enabled = false;
}