// SPDX-License-Identifier: GPL-2.0+ /* * Common code for ARM v8 MPAM * * Copyright (C) 2020-2021 Huawei Technologies Co., Ltd * * Author: Wang Shaobo * * Code was partially borrowed from http://www.linux-arm.org/ * git?p=linux-jm.git;a=shortlog;h=refs/heads/mpam/snapshot/may. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * More information about MPAM be found in the Arm Architecture Reference * Manual. * * https://static.docs.arm.com/ddi0598/a/DDI0598_MPAM_supp_armv8a.pdf */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include "mpam_device.h" #include "mpam_internal.h" /* * The classes we've picked to map to resctrl resources. * Class pointer may be NULL. */ struct mpam_resctrl_res mpam_resctrl_exports[RDT_NUM_RESOURCES]; struct mpam_resctrl_res mpam_resctrl_events[RESCTRL_NUM_EVENT_IDS]; /* Like resctrl_get_domain_from_cpu(), but for offline CPUs */ static struct mpam_resctrl_dom * mpam_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) { struct rdt_domain *d; struct mpam_resctrl_dom *dom; list_for_each_entry(d, &res->resctrl_res.domains, list) { dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); if (cpumask_test_cpu(cpu, &dom->comp->fw_affinity)) return dom; } return NULL; } static int mpam_resctrl_setup_domain(unsigned int cpu, struct mpam_resctrl_res *res) { struct rdt_domain *d; struct mpam_resctrl_dom *dom; struct mpam_class *class = res->class; struct mpam_component *comp_iter, *comp; u32 num_partid; u32 **ctrlval_ptr; enum resctrl_ctrl_type type; struct list_head *tmp; num_partid = mpam_sysprops_num_partid(); comp = NULL; list_for_each_entry(comp_iter, &class->components, class_list) { if (cpumask_test_cpu(cpu, &comp_iter->fw_affinity)) { comp = comp_iter; break; } } /* cpu with unknown exported component? */ if (WARN_ON_ONCE(!comp)) return 0; dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); if (!dom) return -ENOMEM; dom->comp = comp; INIT_LIST_HEAD(&dom->resctrl_dom.list); dom->resctrl_dom.id = comp->comp_id; cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask); for_each_ctrl_type(type) { ctrlval_ptr = &dom->resctrl_dom.ctrl_val[type]; *ctrlval_ptr = kmalloc_array(num_partid, sizeof(**ctrlval_ptr), GFP_KERNEL); if (!*ctrlval_ptr) { kfree(dom); return -ENOMEM; } } tmp = &res->resctrl_res.domains; /* insert domains in id ascending order */ list_for_each_entry(d, &res->resctrl_res.domains, list) { /* find the last domain with id greater than this domain */ if (dom->resctrl_dom.id > d->id) tmp = &d->list; if (dom->resctrl_dom.id < d->id) break; } list_add(&dom->resctrl_dom.list, tmp); res->resctrl_res.dom_num++; return 0; } int mpam_resctrl_cpu_online(unsigned int cpu) { int ret; struct mpam_resctrl_dom *dom; struct mpam_resctrl_res *res; for_each_supported_resctrl_exports(res) { dom = mpam_get_domain_from_cpu(cpu, res); if (dom) { cpumask_set_cpu(cpu, &dom->resctrl_dom.cpu_mask); } else { ret = mpam_resctrl_setup_domain(cpu, res); if (ret) return ret; } } return mpam_resctrl_set_default_cpu(cpu); } static inline struct rdt_domain * resctrl_get_domain_from_cpu(int cpu, struct resctrl_resource *r) { struct rdt_domain *d; list_for_each_entry(d, &r->domains, list) { /* Find the domain that contains this CPU */ if (cpumask_test_cpu(cpu, &d->cpu_mask)) return d; } return NULL; } int mpam_resctrl_cpu_offline(unsigned int cpu) { struct rdt_domain *d; struct mpam_resctrl_res *res; struct mpam_resctrl_dom *dom; for_each_supported_resctrl_exports(res) { d = resctrl_get_domain_from_cpu(cpu, &res->resctrl_res); /* cpu with unknown exported component? */ if (WARN_ON_ONCE(!d)) continue; cpumask_clear_cpu(cpu, &d->cpu_mask); if (!cpumask_empty(&d->cpu_mask)) continue; list_del(&d->list); dom = container_of(d, struct mpam_resctrl_dom, resctrl_dom); kfree(dom); } mpam_resctrl_clear_default_cpu(cpu); return 0; } /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ static void mpam_resctrl_pick_caches(void) { struct mpam_class *class; struct mpam_resctrl_res *res; mpam_class_list_lock_held(); list_for_each_entry(class, &mpam_classes, classes_list) { if (class->type != MPAM_CLASS_CACHE) continue; if (class->level != 2 && class->level != 3) continue; if (!mpam_has_feature(mpam_feat_cpor_part, class->features) && !mpam_has_feature(mpam_feat_msmon_csu, class->features)) continue; if (!mpam_has_feature(mpam_feat_msmon_csu, class->features) && mpam_sysprops_num_partid() <= 1) continue; if (class->cpbm_wd > RESCTRL_MAX_CBM) continue; if (class->level == 2) { res = &mpam_resctrl_exports[RDT_RESOURCE_L2]; res->resctrl_res.name = "L2"; } else { res = &mpam_resctrl_exports[RDT_RESOURCE_L3]; res->resctrl_res.name = "L3"; } res->class = class; } } /* Find what we can export as MBA */ static void mpam_resctrl_pick_mba(void) { u8 resctrl_llc; struct mpam_class *class; struct mpam_class *candidate = NULL; mpam_class_list_lock_held(); /* At least two partitions ... */ if (mpam_sysprops_num_partid() <= 1) return; if (mpam_resctrl_exports[RDT_RESOURCE_L3].class) resctrl_llc = 3; else if (mpam_resctrl_exports[RDT_RESOURCE_L2].class) resctrl_llc = 2; else resctrl_llc = 0; list_for_each_entry(class, &mpam_classes, classes_list) { if (class->type == MPAM_CLASS_UNKNOWN) continue; if (class->level < resctrl_llc) continue; /* * Once we support MBM counters, we should require the MBA * class to be at the same point in the hierarchy. Practically, * this means the MBA class must support MBWU. Until then * having something is better than nothing, but this may cause * the MBA resource to disappear over a kernel update on a * system that could support both, but not at the same time. */ /* * There are two ways we can generate delays for MBA, either * with the mbw portion bitmap, or the mbw max control. */ if (!mpam_has_feature(mpam_feat_mbw_part, class->features) && !mpam_has_feature(mpam_feat_mbw_max, class->features)) { continue; } /* pick the class 'closest' to resctrl_llc */ if (!candidate || (class->level < candidate->level)) candidate = class; } if (candidate) mpam_resctrl_exports[RDT_RESOURCE_MC].class = candidate; } static void mpam_resctrl_pick_event_l3_occup(void) { /* * as the name suggests, resctrl can only use this if your cache is * called 'l3'. */ struct mpam_resctrl_res *res = &mpam_resctrl_exports[RDT_RESOURCE_L3]; if (!res->class) return; if (!mpam_has_feature(mpam_feat_msmon_csu, res->class->features)) return; mpam_resctrl_events[QOS_L3_OCCUP_EVENT_ID] = *res; rdt_mon_capable = true; res->resctrl_res.mon_capable = true; res->resctrl_res.mon_capable = true; } static void mpam_resctrl_pick_event_mbm_total(void) { u64 num_counters; struct mpam_resctrl_res *res; /* We prefer to measure mbm_total on whatever we used as MBA... */ res = &mpam_resctrl_exports[RDT_RESOURCE_MC]; if (!res->class) { /* ... but if there isn't one, the L3 cache works */ res = &mpam_resctrl_exports[RDT_RESOURCE_L3]; if (!res->class) return; } /* * to measure bandwidth in a resctrl like way, we need to leave a * counter running all the time. As these are PMU-like, it is really * unlikely we have enough... To be useful, we'd need at least one per * closid. */ num_counters = mpam_sysprops_num_partid(); if (mpam_has_feature(mpam_feat_msmon_mbwu, res->class->features)) { if (res->class->num_mbwu_mon >= num_counters) { /* * We don't support this use of monitors, let the * world know this platform could make use of them * if we did! */ } } } static void mpam_resctrl_pick_event_mbm_local(void) { struct mpam_resctrl_res *res; res = &mpam_resctrl_exports[RDT_RESOURCE_MC]; if (!res->class) return; if (mpam_has_feature(mpam_feat_msmon_mbwu, res->class->features)) { res->resctrl_res.mon_capable = true; rdt_mon_capable = true; mpam_resctrl_events[QOS_L3_MBM_LOCAL_EVENT_ID] = *res; } } static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; struct resctrl_resource *r = &res->resctrl_res; struct raw_resctrl_resource *rr = NULL; if (class == mpam_resctrl_exports[RDT_RESOURCE_SMMU].class) { return 0; } else if (class == mpam_resctrl_exports[RDT_RESOURCE_MC].class) { r->rid = RDT_RESOURCE_MC; r->name = "MB"; r->fflags = RFTYPE_RES_MC; r->mbw.delay_linear = true; rr = mpam_get_raw_resctrl_resource(RDT_RESOURCE_MC); rr->num_mon = class->num_mbwu_mon; r->res = rr; if (mpam_has_feature(mpam_feat_mbw_part, class->features)) { /* * The maximum throttling is the number of bits we can * unset in the bitmap. We never clear all of them, * so the minimum is one bit, as a percentage. */ r->mbw.min_bw = MAX_MBA_BW / class->mbw_pbm_bits; rr->ctrl_features[SCHEMA_PBM].max_wd = MAX_MBA_BW + 1; rr->ctrl_features[SCHEMA_PBM].capable = true; } if (mpam_has_feature(mpam_feat_mbw_max, class->features)) { /* * The maximum throttling is the number of fractions we * can represent with the implemented bits. We never * set 0. The minimum is the LSB, as a percentage. */ r->mbw.min_bw = MAX_MBA_BW / ((1ULL << class->bwa_wd) - 1); /* the largest mbw_max is 100 */ rr->ctrl_features[SCHEMA_MAX].default_ctrl = MAX_MBA_BW; rr->ctrl_features[SCHEMA_MAX].max_wd = MAX_MBA_BW + 1; rr->ctrl_features[SCHEMA_MAX].capable = true; /* default set max stride MAX as COMMON ctrl feature */ rr->ctrl_features[SCHEMA_COMM].default_ctrl = rr->ctrl_features[SCHEMA_MAX].default_ctrl; rr->ctrl_features[SCHEMA_COMM].max_wd = rr->ctrl_features[SCHEMA_MAX].max_wd; rr->ctrl_features[SCHEMA_COMM].capable = rr->ctrl_features[SCHEMA_MAX].capable; } if (mpam_has_feature(mpam_feat_mbw_min, class->features)) { rr->ctrl_features[SCHEMA_MIN].max_wd = MAX_MBA_BW + 1; rr->ctrl_features[SCHEMA_MIN].capable = true; } /* * Export priority setting, which represents the max level of * control we can export. this default priority from hardware, * no clever here, no need to define additional default value. */ if (mpam_has_feature(mpam_feat_intpri_part, class->features)) { rr->ctrl_features[SCHEMA_PRI].max_wd = 1 << class->intpri_wd; rr->ctrl_features[SCHEMA_PRI].default_ctrl = class->hwdef_intpri; rr->ctrl_features[SCHEMA_PRI].capable = true; } /* Just in case we have an excessive number of bits */ if (!r->mbw.min_bw) r->mbw.min_bw = 1; /* * james said because its linear with no offset, the granule is the same * as the smallest value. It is a little fuzzy here because a granularity * of 1 would appear too fine to make percentage conversions. */ r->mbw.bw_gran = GRAN_MBA_BW; /* We will only pick a class that can monitor and control */ r->alloc_capable = true; r->alloc_enabled = true; rdt_alloc_capable = true; r->mon_capable = true; r->mon_enabled = true; /* Export memory bandwidth hardlimit, default active hardlimit */ rr->ctrl_features[SCHEMA_HDL].default_ctrl = 1; rr->ctrl_features[SCHEMA_HDL].max_wd = 2; rr->ctrl_features[SCHEMA_HDL].capable = true; } else if (class == mpam_resctrl_exports[RDT_RESOURCE_L3].class) { r->rid = RDT_RESOURCE_L3; rr = mpam_get_raw_resctrl_resource(RDT_RESOURCE_L3); rr->num_mon = class->num_csu_mon; r->res = rr; r->fflags = RFTYPE_RES_CACHE; r->name = "L3"; if (mpam_has_feature(mpam_feat_cpor_part, class->features)) { r->cache.cbm_len = class->cpbm_wd; rr->ctrl_features[SCHEMA_PBM].default_ctrl = GENMASK(class->cpbm_wd - 1, 0); rr->ctrl_features[SCHEMA_PBM].max_wd = rr->ctrl_features[SCHEMA_PBM].default_ctrl + 1; rr->ctrl_features[SCHEMA_PBM].capable = true; /* * Which bits are shared with other ...things... * Unknown devices use partid-0 which uses all the bitmap * fields. Until we configured the SMMU and GIC not to do this * 'all the bits' is the correct answer here. */ r->cache.shareable_bits = rr->ctrl_features[SCHEMA_PBM].default_ctrl; r->cache.min_cbm_bits = 1; /* default set CPBM as COMMON ctrl feature */ rr->ctrl_features[SCHEMA_COMM].default_ctrl = rr->ctrl_features[SCHEMA_PBM].default_ctrl; rr->ctrl_features[SCHEMA_COMM].max_wd = rr->ctrl_features[SCHEMA_PBM].max_wd; rr->ctrl_features[SCHEMA_COMM].capable = rr->ctrl_features[SCHEMA_PBM].capable; } if (mpam_has_feature(mpam_feat_intpri_part, class->features)) { /* * Export internal priority setting, which represents the * max level of control we can export to resctrl. this default * priority is from hardware, no clever here. */ rr->ctrl_features[SCHEMA_PRI].max_wd = 1 << class->intpri_wd; rr->ctrl_features[SCHEMA_PRI].default_ctrl = class->hwdef_intpri; rr->ctrl_features[SCHEMA_PRI].capable = true; } if (mpam_has_feature(mpam_feat_ccap_part, class->features)) { rr->ctrl_features[SCHEMA_MAX].max_wd = mpam_sysprops_llc_size() + 1; rr->ctrl_features[SCHEMA_MAX].capable = true; } /* * Only this resource is allocable can it be picked from * mpam_resctrl_pick_caches(). So directly set following * fields to true. */ r->alloc_capable = true; r->alloc_enabled = true; rdt_alloc_capable = true; /* * While this is a CPU-interface feature of MPAM, we only tell * resctrl about it for caches, as that seems to be how x86 * works, and thus what resctrl expects. */ r->cdp_capable = true; r->mon_capable = true; r->mon_enabled = true; } else if (class == mpam_resctrl_exports[RDT_RESOURCE_L2].class) { r->rid = RDT_RESOURCE_L2; rr = mpam_get_raw_resctrl_resource(RDT_RESOURCE_L2); rr->num_mon = class->num_csu_mon; r->res = rr; r->fflags = RFTYPE_RES_CACHE; r->name = "L2"; if (mpam_has_feature(mpam_feat_cpor_part, class->features)) { r->cache.cbm_len = class->cpbm_wd; rr->ctrl_features[SCHEMA_PBM].default_ctrl = GENMASK(class->cpbm_wd - 1, 0); rr->ctrl_features[SCHEMA_PBM].max_wd = rr->ctrl_features[SCHEMA_PBM].default_ctrl + 1; rr->ctrl_features[SCHEMA_PBM].capable = true; /* * Which bits are shared with other ...things... * Unknown devices use partid-0 which uses all the bitmap * fields. Until we configured the SMMU and GIC not to do this * 'all the bits' is the correct answer here. */ r->cache.shareable_bits = rr->ctrl_features[SCHEMA_COMM].default_ctrl; /* default set max stride MAX as COMMON ctrl feature */ rr->ctrl_features[SCHEMA_COMM].default_ctrl = rr->ctrl_features[SCHEMA_PBM].default_ctrl; rr->ctrl_features[SCHEMA_COMM].max_wd = rr->ctrl_features[SCHEMA_PBM].max_wd; rr->ctrl_features[SCHEMA_COMM].capable = rr->ctrl_features[SCHEMA_PBM].capable; } if (mpam_has_feature(mpam_feat_ccap_part, class->features)) { rr->ctrl_features[SCHEMA_MAX].max_wd = ~0; rr->ctrl_features[SCHEMA_MAX].capable = true; } if (mpam_has_feature(mpam_feat_intpri_part, class->features)) { /* * Export internal priority setting, which represents the * max level of control we can export to resctrl. this default * priority is from hardware, no clever here. */ rr->ctrl_features[SCHEMA_PRI].max_wd = 1 << class->intpri_wd; rr->ctrl_features[SCHEMA_PRI].default_ctrl = class->hwdef_intpri; rr->ctrl_features[SCHEMA_PRI].capable = true; } /* * Only this resource is allocable can it be picked from * mpam_resctrl_pick_caches(). So directly set following * fields to true. */ r->alloc_capable = true; r->alloc_enabled = true; rdt_alloc_capable = true; /* * While this is a CPU-interface feature of MPAM, we only tell * resctrl about it for caches, as that seems to be how x86 * works, and thus what resctrl expects. */ r->cdp_capable = true; r->mon_capable = false; } if (rr && class) { rr->num_partid = class->num_partid; rr->num_intpartid = class->num_intpartid; rr->num_pmg = class->num_pmg; } return 0; } /* Called with the mpam classes lock held */ int mpam_resctrl_setup(void) { int rc; struct mpam_resctrl_res *res; enum resctrl_resource_level level = 0; for_each_resctrl_exports(res) { INIT_LIST_HEAD(&res->resctrl_res.domains); res->resctrl_res.rid = level; level++; } mpam_resctrl_pick_caches(); mpam_resctrl_pick_mba(); mpam_resctrl_pick_event_l3_occup(); mpam_resctrl_pick_event_mbm_total(); mpam_resctrl_pick_event_mbm_local(); for_each_supported_resctrl_exports(res) { rc = mpam_resctrl_resource_init(res); if (rc) return rc; } if (!rdt_alloc_capable && !rdt_mon_capable) return -EOPNOTSUPP; return 0; } struct resctrl_resource * mpam_resctrl_get_resource(enum resctrl_resource_level level) { if (level >= RDT_NUM_RESOURCES || !mpam_resctrl_exports[level].class) return NULL; return &mpam_resctrl_exports[level].resctrl_res; }