336 lines
7.9 KiB
C
336 lines
7.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* legacy-filescontrol.c - Cgroup controller for open file handles.
|
|
*
|
|
* Copyright 2014 Google Inc.
|
|
* Author: Brian Makin <merimus@google.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*/
|
|
|
|
#include <linux/page_counter.h>
|
|
#include <linux/legacy-filescontrol.h>
|
|
#include <linux/filescontrol.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/export.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/module.h>
|
|
|
|
#define FILES_MAX D_COUNT_MAX
|
|
#define FILES_MAX_STR "max"
|
|
|
|
static bool no_acct;
|
|
struct cgroup_subsys files_cgrp_subsys __read_mostly;
|
|
|
|
module_param(no_acct, bool, 0444);
|
|
|
|
struct files_cgroup {
|
|
struct cgroup_subsys_state css;
|
|
struct page_counter open_handles;
|
|
};
|
|
|
|
static inline bool files_cgroup_enabled(void)
|
|
{
|
|
return cgroup_subsys_enabled(files_cgrp_subsys);
|
|
}
|
|
|
|
static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css)
|
|
{
|
|
return css ? container_of(css, struct files_cgroup, css) : NULL;
|
|
}
|
|
|
|
static inline struct page_counter *
|
|
css_res_open_handles(struct cgroup_subsys_state *css)
|
|
{
|
|
return &css_fcg(css)->open_handles;
|
|
}
|
|
|
|
static inline struct files_cgroup *
|
|
files_cgroup_from_files(struct files_struct *files)
|
|
{
|
|
return files->files_cgroup;
|
|
}
|
|
|
|
|
|
static struct cgroup_subsys_state *
|
|
files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
{
|
|
struct files_cgroup *parent_fcg;
|
|
struct files_cgroup *fcg;
|
|
|
|
parent_fcg = css_fcg(parent_css);
|
|
fcg = kzalloc(sizeof(*fcg), GFP_KERNEL);
|
|
if (!fcg)
|
|
goto out;
|
|
|
|
if (!parent_fcg) {
|
|
page_counter_init(&fcg->open_handles, NULL);
|
|
page_counter_set_max(&fcg->open_handles, FILES_MAX);
|
|
} else {
|
|
struct page_counter *p_counter = &parent_fcg->open_handles;
|
|
|
|
page_counter_init(&fcg->open_handles, p_counter);
|
|
page_counter_set_max(&fcg->open_handles, FILES_MAX);
|
|
}
|
|
return &fcg->css;
|
|
|
|
out:
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
static void files_cgroup_css_free(struct cgroup_subsys_state *css)
|
|
{
|
|
kfree(css_fcg(css));
|
|
}
|
|
|
|
/*
|
|
* If attaching this cgroup would overcommit the resource then deny
|
|
* the attach. If not, attach the file resource into new cgroup.
|
|
*/
|
|
static int files_cgroup_can_attach(struct cgroup_taskset *tset)
|
|
{
|
|
u64 num_files;
|
|
bool can_attach;
|
|
struct cgroup_subsys_state *to_css;
|
|
struct cgroup_subsys_state *from_css;
|
|
struct page_counter *from_res;
|
|
struct page_counter *to_res;
|
|
struct page_counter *fail_res;
|
|
struct files_struct *files;
|
|
struct task_struct *task = cgroup_taskset_first(tset, &to_css);
|
|
|
|
to_res = css_res_open_handles(to_css);
|
|
|
|
task_lock(task);
|
|
files = task->files;
|
|
if (!files || !files->files_cgroup || files == &init_files) {
|
|
task_unlock(task);
|
|
return 0;
|
|
}
|
|
|
|
from_css = &files_cgroup_from_files(files)->css;
|
|
from_res = css_res_open_handles(from_css);
|
|
|
|
spin_lock(&files->file_lock);
|
|
num_files = file_cg_count_fds(files);
|
|
page_counter_uncharge(from_res, num_files);
|
|
|
|
if (!page_counter_try_charge(to_res, num_files, &fail_res)) {
|
|
page_counter_charge(from_res, num_files);
|
|
pr_err("Open files limit overcommited\n");
|
|
can_attach = false;
|
|
} else {
|
|
css_put(from_css);
|
|
css_get(to_css);
|
|
task->files->files_cgroup = css_fcg(to_css);
|
|
can_attach = true;
|
|
}
|
|
spin_unlock(&files->file_lock);
|
|
task_unlock(task);
|
|
return can_attach ? 0 : -ENOSPC;
|
|
}
|
|
|
|
int files_cgroup_alloc_fd(struct files_struct *files, u64 n)
|
|
{
|
|
if (!files_cgroup_enabled())
|
|
return 0;
|
|
/*
|
|
* Kernel threads which are forked by kthreadd inherited the
|
|
* const files_struct 'init_files', we didn't wrap it so
|
|
* there's no associated files_cgroup.
|
|
*
|
|
* Kernel threads always stay in root cgroup, and we don't
|
|
* have limit for root files cgroup, so it won't hurt if
|
|
* we don't charge their fds, only issue is that files.usage
|
|
* won't be accurate in root files cgroup.
|
|
*/
|
|
if (!no_acct && files != &init_files) {
|
|
struct page_counter *fail_res;
|
|
struct files_cgroup *files_cgroup =
|
|
files_cgroup_from_files(files);
|
|
if (!page_counter_try_charge(&files_cgroup->open_handles,
|
|
n, &fail_res))
|
|
return -ENOMEM;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void files_cgroup_unalloc_fd(struct files_struct *files, u64 n)
|
|
{
|
|
if (!files_cgroup_enabled())
|
|
return;
|
|
/*
|
|
* It's not charged so no need to uncharge, see comments in
|
|
* files_cgroup_alloc_fd.
|
|
*/
|
|
if (!no_acct && files != &init_files) {
|
|
struct files_cgroup *files_cgroup =
|
|
files_cgroup_from_files(files);
|
|
page_counter_uncharge(&files_cgroup->open_handles, n);
|
|
}
|
|
}
|
|
|
|
void files_cgroup_put_fd(struct files_struct *files, unsigned int fd)
|
|
{
|
|
struct fdtable *fdt = files_fdtable(files);
|
|
|
|
if (!files_cgroup_enabled())
|
|
return;
|
|
|
|
if (test_bit(fd, fdt->open_fds))
|
|
return files_cgroup_unalloc_fd(files, 1);
|
|
}
|
|
|
|
int files_cgroup_dup_fds(struct files_struct *newf)
|
|
{
|
|
int err;
|
|
|
|
if (!files_cgroup_enabled())
|
|
return 0;
|
|
spin_lock(&newf->file_lock);
|
|
err = files_cgroup_alloc_fd(newf, file_cg_count_fds(newf));
|
|
spin_unlock(&newf->file_lock);
|
|
return err;
|
|
}
|
|
|
|
static u64 files_disabled_read(struct cgroup_subsys_state *css,
|
|
struct cftype *cft)
|
|
{
|
|
return no_acct;
|
|
}
|
|
|
|
static int files_disabled_write(struct cgroup_subsys_state *css,
|
|
struct cftype *cft, u64 val)
|
|
{
|
|
if (!val)
|
|
return -EINVAL;
|
|
no_acct = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int files_limit_read(struct seq_file *sf, void *v)
|
|
{
|
|
struct files_cgroup *fcg = css_fcg(seq_css(sf));
|
|
struct page_counter *counter = &fcg->open_handles;
|
|
u64 limit = counter->max;
|
|
|
|
if (limit >= FILES_MAX)
|
|
seq_printf(sf, "%s\n", FILES_MAX_STR);
|
|
else
|
|
seq_printf(sf, "%llu\n", limit);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t files_limit_write(struct kernfs_open_file *of,
|
|
char *buf, size_t nbytes, loff_t off)
|
|
{
|
|
struct files_cgroup *fcg = css_fcg(of_css(of));
|
|
u64 limit;
|
|
int err;
|
|
|
|
buf = strstrip((char *)buf);
|
|
if (!strcmp(buf, FILES_MAX_STR)) {
|
|
limit = FILES_MAX;
|
|
goto set_limit;
|
|
}
|
|
|
|
err = kstrtoull(buf, 0, &limit);
|
|
if (err)
|
|
return err;
|
|
|
|
set_limit:
|
|
/*
|
|
* Limit updates don't need to be mutex'd, since it isn't
|
|
* critical that any racing fork()s follow the new limit.
|
|
*/
|
|
page_counter_set_max(&fcg->open_handles, limit);
|
|
return nbytes;
|
|
}
|
|
|
|
|
|
static u64 files_usage_read(struct cgroup_subsys_state *css,
|
|
struct cftype *cft)
|
|
{
|
|
struct files_cgroup *fcg = css_fcg(css);
|
|
|
|
return page_counter_read(&fcg->open_handles);
|
|
}
|
|
|
|
static struct cftype files[] = {
|
|
{
|
|
.name = "limit",
|
|
.seq_show = files_limit_read,
|
|
.write = files_limit_write,
|
|
.flags = CFTYPE_NOT_ON_ROOT,
|
|
},
|
|
{
|
|
.name = "usage",
|
|
.read_u64 = files_usage_read,
|
|
},
|
|
{
|
|
.name = "no_acct",
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
|
.read_u64 = files_disabled_read,
|
|
.write_u64 = files_disabled_write,
|
|
},
|
|
{ }
|
|
};
|
|
|
|
struct cgroup_subsys files_cgrp_subsys = {
|
|
.css_alloc = files_cgroup_css_alloc,
|
|
.css_free = files_cgroup_css_free,
|
|
.can_attach = files_cgroup_can_attach,
|
|
.legacy_cftypes = files,
|
|
};
|
|
|
|
/*
|
|
* It could race against cgroup migration of current task, and
|
|
* using task_get_css() to get a valid css.
|
|
*/
|
|
void files_cgroup_assign(struct files_struct *files)
|
|
{
|
|
struct cgroup_subsys_state *css;
|
|
|
|
if (!files_cgroup_enabled())
|
|
return;
|
|
|
|
if (files == &init_files)
|
|
return;
|
|
|
|
css = task_get_css(current, files_cgrp_id);
|
|
files->files_cgroup = container_of(css, struct files_cgroup, css);
|
|
}
|
|
|
|
void files_cgroup_remove(struct files_struct *files)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
struct files_cgroup *fcg;
|
|
|
|
if (!files_cgroup_enabled())
|
|
return;
|
|
|
|
if (files == &init_files)
|
|
return;
|
|
|
|
task_lock(tsk);
|
|
spin_lock(&files->file_lock);
|
|
fcg = files_cgroup_from_files(files);
|
|
css_put(&fcg->css);
|
|
spin_unlock(&files->file_lock);
|
|
task_unlock(tsk);
|
|
}
|