// SPDX-License-Identifier: GPL-2.0+ /* * Common code for Smart Grid Scheduling * * Copyright (C) 2023-2024 Huawei Technologies Co., Ltd * * Author: Wang Shaobo * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * */ #include #include #include #include #include #include #include #include "internal.h" #include <../kernel/sched/sched.h> static inline int qos_affinity_set(struct task_struct *p) { int n; struct sched_grid_qos_affinity *affinity = &p->grid_qos->affinity; if (likely(affinity->prefer_cpus == p->select_cpus)) return 0; /* * We want the memory allocation to be as close to the CPU * as possible, and adjust after getting memory bandwidth usage. */ for (n = 0; n < nr_node_ids; n++) { if (cpumask_intersects(cpumask_of_node(n), p->select_cpus)) node_set(n, affinity->mem_preferred_node_mask); else node_clear(n, affinity->mem_preferred_node_mask); } affinity->prefer_cpus = p->select_cpus; return 0; } int sched_grid_qos_fork(struct task_struct *p, struct task_struct *orig) { struct sched_grid_qos *qos; qos = kzalloc(sizeof(*qos), GFP_KERNEL); if (!qos) return -ENOMEM; qos_power_init(&qos->power); qos_stat_init(&qos->stat); nodes_clear(qos->affinity.mem_preferred_node_mask); if (likely(orig->grid_qos)) qos->affinity = orig->grid_qos->affinity; qos->affinity_set = qos_affinity_set; p->grid_qos = qos; return 0; } void sched_grid_qos_free(struct task_struct *p) { kfree(p->grid_qos); p->grid_qos = NULL; } /* dynamic select a more appropriate preferred interleave nid for process */ int sched_grid_preferred_interleave_nid(struct mempolicy *policy) { #ifndef CONFIG_NUMA return NUMA_NO_NODE; #else nodemask_t nmask; unsigned int next; struct task_struct *me = current; nodemask_t *preferred_nmask = NULL; if (likely(me->grid_qos)) preferred_nmask = &me->grid_qos->affinity.mem_preferred_node_mask; if (!preferred_nmask || !policy) return NUMA_NO_NODE; if (nodes_equal(policy->nodes, *preferred_nmask)) return NUMA_NO_NODE; /* * We perceive the actual consumption of memory bandwidth * in each node and post a preferred interleave nid in * more appropriate range. */ nodes_and(nmask, policy->nodes, *preferred_nmask); if (nodes_empty(nmask)) return NUMA_NO_NODE; next = next_node_in(me->il_prev, nmask); if (next < MAX_NUMNODES) me->il_prev = next; return next; #endif } /* dynamic select a more appropriate preferred nid for process */ int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask) { int nd = preferred_nid; nodemask_t nmask, ndmask; nodemask_t *preferred_nmask = NULL; if (likely(current->grid_qos)) preferred_nmask = ¤t->grid_qos->affinity.mem_preferred_node_mask; if (!preferred_nmask) return preferred_nid; /* * We perceive the actual consumption of memory bandwidth * in each node and post a preferred nid in more appropriate * range. */ nmask = *preferred_nmask; if (nodemask) { if (nodes_equal(*nodemask, nmask)) return preferred_nid; nodes_and(nmask, nmask, *nodemask); } if (node_isset(preferred_nid, nmask)) return preferred_nid; /* * We prefer the numa node we're running, if there is no limit * to nodemask, we select preferred nid in preferred range or * in restriced range if not. */ init_nodemask_of_node(&ndmask, numa_node_id()); nodes_and(ndmask, nmask, ndmask); if (!nodes_empty(ndmask)) nd = first_node(ndmask); else if (!nodes_empty(nmask)) nd = first_node(nmask); return nd; } static struct sched_grid_zone sg_zone; int __init sched_grid_zone_init(void) { int index; for (index = 0; index < SMART_GRID_ZONE_NR; index++) cpumask_clear(&sg_zone.cpus[index]); raw_spin_lock_init(&sg_zone.lock); INIT_LIST_HEAD(&sg_zone.af_list_head); return 0; } int sched_grid_zone_update(bool is_locked) { struct list_head *pos; struct auto_affinity *af_pos; unsigned long flags; if (!is_locked) raw_spin_lock_irqsave(&sg_zone.lock, flags); cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_HOT]); cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_WARM]); list_for_each(pos, &sg_zone.af_list_head) { af_pos = list_entry(pos, struct auto_affinity, af_list); /* when smart_grid not used we need calculate all task_group */ /* when smart_grid used we only calculate enabled task_group */ if (smart_grid_used() && af_pos->mode == 0) continue; cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_HOT], &sg_zone.cpus[SMART_GRID_ZONE_HOT], af_pos->ad.domains[af_pos->ad.curr_level]); /* Update warm zone CPUs to max level first */ cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM], af_pos->ad.domains[af_pos->ad.dcount - 1]); } /* Then reset warm zone CPUs without hot zone CPUs */ cpumask_andnot(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_HOT]); if (!is_locked) raw_spin_unlock_irqrestore(&sg_zone.lock, flags); cpufreq_smart_grid_start_sync(); return 0; } int sched_grid_zone_add_af(struct auto_affinity *af) { unsigned long flags; if (af == NULL) return -1; raw_spin_lock_irqsave(&sg_zone.lock, flags); list_add_tail(&af->af_list, &sg_zone.af_list_head); sched_grid_zone_update(true); raw_spin_unlock_irqrestore(&sg_zone.lock, flags); return 0; } int sched_grid_zone_del_af(struct auto_affinity *af) { unsigned long flags; if (af == NULL) return -1; raw_spin_lock_irqsave(&sg_zone.lock, flags); list_del(&af->af_list); sched_grid_zone_update(true); raw_spin_unlock_irqrestore(&sg_zone.lock, flags); return 0; } struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone) { if (zone >= SMART_GRID_ZONE_NR) return NULL; return &sg_zone.cpus[zone]; } /* * Default smart_grid strategy was disable (=0). * But, considering for inheritance of the pre-verion code. * We make all the task to the highest qos_level (class_lvl = 0), * when smart_grid strategy was disabled. * Otherwise, When smart_grid strategy was enabled, we use the task's * actually class_lvl. */ unsigned int sysctl_smart_grid_strategy_ctrl; struct cpumask *sched_grid_prefer_cpus(struct task_struct *p) { struct affinity_domain *ad; enum sg_zone_type current_zone; ad = &task_group(p)->auto_affinity->ad; /* * when smart_grid strategy was disabled, * We make all the task to the highest qos_level (class_lvl = 0) */ if (sysctl_smart_grid_strategy_ctrl == 0) return ad->domains[ad->curr_level]; /* Only place the highest level task into hot zone */ current_zone = p->grid_qos->stat.class_lvl == SCHED_GRID_QOS_TASK_LEVEL_HIGHEST ? SMART_GRID_ZONE_HOT : SMART_GRID_ZONE_WARM; /* Place the highest level task in current domain level itself */ if (current_zone == SMART_GRID_ZONE_HOT) return ad->domains[ad->curr_level]; return &sg_zone.cpus[current_zone]; }