X Tutup
Skip to content

Commit 3a0f06c

Browse files
committed
core: make TasksMax a partially dynamic property
TasksMax= and DefaultTasksMax= can be specified as percentages. We don't actually document of what the percentage is relative to, but the implementation uses the smallest of /proc/sys/kernel/pid_max, /proc/sys/kernel/threads-max, and /sys/fs/cgroup/pids.max (when present). When the value is a percentage, we immediately convert it to an absolute value. If the limit later changes (which can happen e.g. when systemd-sysctl runs), the absolute value becomes outdated. So let's store either the percentage or absolute value, whatever was specified, and only convert to an absolute value when the value is used. For example, when starting a unit, the absolute value will be calculated when the cgroup for the unit is created. Fixes systemd#13419.
1 parent a650e19 commit 3a0f06c

File tree

12 files changed

+128
-29
lines changed

12 files changed

+128
-29
lines changed

src/basic/cgroup-util.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
130130
}
131131

132132
/* Default resource limits */
133-
#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */
134133
#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
135134

136135
typedef enum CGroupUnified {

src/basic/limits-util.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
9999
}
100100

101101
uint64_t system_tasks_max(void) {
102-
103102
uint64_t a = TASKS_MAX, b = TASKS_MAX;
104103
_cleanup_free_ char *root = NULL;
105104
int r;

src/core/cgroup.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "fd-util.h"
1717
#include "fileio.h"
1818
#include "fs-util.h"
19+
#include "limits-util.h"
1920
#include "parse-util.h"
2021
#include "path-util.h"
2122
#include "process-util.h"
@@ -34,6 +35,13 @@
3435
* out specific attributes from us. */
3536
#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
3637

38+
uint64_t tasks_max_resolve(const TasksMax *tasks_max) {
39+
if (tasks_max->scale == 0)
40+
return tasks_max->value;
41+
42+
return system_tasks_max_scale(tasks_max->value, tasks_max->scale);
43+
}
44+
3745
bool manager_owns_host_root_cgroup(Manager *m) {
3846
assert(m);
3947

@@ -117,7 +125,7 @@ void cgroup_context_init(CGroupContext *c) {
117125
.blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
118126
.startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
119127

120-
.tasks_max = CGROUP_LIMIT_MAX,
128+
.tasks_max = TASKS_MAX_UNSET,
121129
};
122130
}
123131

@@ -447,7 +455,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
447455
prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryMax"),
448456
prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "MemorySwapMax"),
449457
prefix, c->memory_limit,
450-
prefix, c->tasks_max,
458+
prefix, tasks_max_resolve(&c->tasks_max),
451459
prefix, cgroup_device_policy_to_string(c->device_policy),
452460
prefix, strempty(disable_controllers_str),
453461
prefix, yes_no(c->delegate));
@@ -1339,9 +1347,9 @@ static void cgroup_context_apply(
13391347
* which is desirable so that there's an official way to release control of the sysctl from
13401348
* systemd: set the limit to unbounded and reload. */
13411349

1342-
if (c->tasks_max != CGROUP_LIMIT_MAX) {
1350+
if (tasks_max_isset(&c->tasks_max)) {
13431351
u->manager->sysctl_pid_max_changed = true;
1344-
r = procfs_tasks_set_limit(c->tasks_max);
1352+
r = procfs_tasks_set_limit(tasks_max_resolve(&c->tasks_max));
13451353
} else if (u->manager->sysctl_pid_max_changed)
13461354
r = procfs_tasks_set_limit(TASKS_MAX);
13471355
else
@@ -1354,10 +1362,10 @@ static void cgroup_context_apply(
13541362
/* The attribute itself is not available on the host root cgroup, and in the container case we want to
13551363
* leave it for the container manager. */
13561364
if (!is_local_root) {
1357-
if (c->tasks_max != CGROUP_LIMIT_MAX) {
1358-
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
1365+
if (tasks_max_isset(&c->tasks_max)) {
1366+
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
13591367

1360-
sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
1368+
xsprintf(buf, "%" PRIu64 "\n", tasks_max_resolve(&c->tasks_max));
13611369
(void) set_attribute_and_warn(u, "pids", "pids.max", buf);
13621370
} else
13631371
(void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
@@ -1434,7 +1442,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
14341442
mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
14351443

14361444
if (c->tasks_accounting ||
1437-
c->tasks_max != CGROUP_LIMIT_MAX)
1445+
tasks_max_isset(&c->tasks_max))
14381446
mask |= CGROUP_MASK_PIDS;
14391447

14401448
return CGROUP_MASK_EXTEND_JOINED(mask);

src/core/cgroup.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,21 @@
99
#include "list.h"
1010
#include "time-util.h"
1111

12+
typedef struct TasksMax {
13+
/* If scale == 0, just use value; otherwise, value / scale.
14+
* See tasks_max_resolve(). */
15+
uint64_t value;
16+
uint64_t scale;
17+
} TasksMax;
18+
19+
#define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
20+
21+
static inline bool tasks_max_isset(const TasksMax *tasks_max) {
22+
return tasks_max->value != UINT64_MAX || tasks_max->scale != 0;
23+
}
24+
25+
uint64_t tasks_max_resolve(const TasksMax *tasks_max);
26+
1227
typedef struct CGroupContext CGroupContext;
1328
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
1429
typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
@@ -135,7 +150,7 @@ struct CGroupContext {
135150
LIST_HEAD(CGroupDeviceAllow, device_allow);
136151

137152
/* Common */
138-
uint64_t tasks_max;
153+
TasksMax tasks_max;
139154
};
140155

141156
/* Used when querying IP accounting data */

src/core/dbus-cgroup.c

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "limits-util.h"
1717
#include "path-util.h"
1818

19+
BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_resolve);
20+
1921
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
2022

2123
static int property_get_cgroup_mask(
@@ -382,7 +384,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
382384
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
383385
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
384386
SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
385-
SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
387+
SD_BUS_PROPERTY("TasksMax", "t", bus_property_get_tasks_max, offsetof(CGroupContext, tasks_max), 0),
386388
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
387389
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
388390
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
@@ -715,9 +717,76 @@ BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEI
715717
BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1);
716718
BUS_DEFINE_SET_CGROUP_LIMIT(memory_protection, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
717719
BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
718-
BUS_DEFINE_SET_CGROUP_LIMIT(tasks_max, CGROUP_MASK_PIDS, system_tasks_max_scale, 1);
719720
#pragma GCC diagnostic pop
720721

722+
static int bus_cgroup_set_tasks_max(
723+
Unit *u,
724+
const char *name,
725+
TasksMax *p,
726+
sd_bus_message *message,
727+
UnitWriteFlags flags,
728+
sd_bus_error *error) {
729+
730+
uint64_t v;
731+
int r;
732+
733+
assert(p);
734+
735+
r = sd_bus_message_read(message, "t", &v);
736+
if (r < 0)
737+
return r;
738+
739+
if (v < 1)
740+
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
741+
"Value specified in %s is out of range", name);
742+
743+
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
744+
*p = (TasksMax) { .value = v, .scale = 0 }; /* When .scale==0, .value is the absolute value */
745+
unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
746+
747+
if (v == CGROUP_LIMIT_MAX)
748+
unit_write_settingf(u, flags, name,
749+
"%s=infinity", name);
750+
else
751+
unit_write_settingf(u, flags, name,
752+
"%s=%" PRIu64, name, v);
753+
}
754+
755+
return 1;
756+
}
757+
758+
static int bus_cgroup_set_tasks_max_scale(
759+
Unit *u,
760+
const char *name,
761+
TasksMax *p,
762+
sd_bus_message *message,
763+
UnitWriteFlags flags,
764+
sd_bus_error *error) {
765+
766+
uint32_t v;
767+
int r;
768+
769+
assert(p);
770+
771+
r = sd_bus_message_read(message, "u", &v);
772+
if (r < 0)
773+
return r;
774+
775+
if (v < 1 || v >= UINT32_MAX)
776+
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
777+
"Value specified in %s is out of range", name);
778+
779+
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
780+
*p = (TasksMax) { v, UINT32_MAX }; /* .scale is not 0, so this is interpreted as v/UINT32_MAX. */
781+
unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
782+
783+
unit_write_settingf(u, flags, name, "%s=%" PRIu32 "%%", "TasksMax",
784+
(uint32_t) (DIV_ROUND_UP((uint64_t) v * 100U, (uint64_t) UINT32_MAX)));
785+
}
786+
787+
return 1;
788+
}
789+
721790
int bus_cgroup_set_property(
722791
Unit *u,
723792
CGroupContext *c,

src/core/dbus-cgroup.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@
99

1010
extern const sd_bus_vtable bus_cgroup_vtable[];
1111

12+
int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
13+
1214
int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);

src/core/dbus-manager.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "architecture.h"
1010
#include "build.h"
1111
#include "bus-common-errors.h"
12+
#include "dbus-cgroup.h"
1213
#include "dbus-execute.h"
1314
#include "dbus-job.h"
1415
#include "dbus-manager.h"
@@ -2465,7 +2466,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
24652466
SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
24662467
SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
24672468
SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
2468-
SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST),
2469+
SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
24692470
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
24702471
SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
24712472

src/core/load-fragment.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3263,36 +3263,39 @@ int config_parse_tasks_max(
32633263
void *data,
32643264
void *userdata) {
32653265

3266-
uint64_t *tasks_max = data, v;
32673266
const Unit *u = userdata;
3267+
TasksMax *tasks_max = data;
3268+
uint64_t v;
32683269
int r;
32693270

32703271
if (isempty(rvalue)) {
3271-
*tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX;
3272+
*tasks_max = u ? u->manager->default_tasks_max : TASKS_MAX_UNSET;
32723273
return 0;
32733274
}
32743275

32753276
if (streq(rvalue, "infinity")) {
3276-
*tasks_max = CGROUP_LIMIT_MAX;
3277+
*tasks_max = TASKS_MAX_UNSET;
32773278
return 0;
32783279
}
32793280

32803281
r = parse_permille(rvalue);
3281-
if (r < 0) {
3282+
if (r >= 0)
3283+
*tasks_max = (TasksMax) { r, 1000U }; /* r‰ */
3284+
else {
32823285
r = safe_atou64(rvalue, &v);
32833286
if (r < 0) {
32843287
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue);
32853288
return 0;
32863289
}
3287-
} else
3288-
v = system_tasks_max_scale(r, 1000U);
32893290

3290-
if (v <= 0 || v >= UINT64_MAX) {
3291-
log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
3292-
return 0;
3291+
if (v <= 0 || v >= UINT64_MAX) {
3292+
log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
3293+
return 0;
3294+
}
3295+
3296+
*tasks_max = (TasksMax) { v };
32933297
}
32943298

3295-
*tasks_max = v;
32963299
return 0;
32973300
}
32983301

src/core/main.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
#include <sanitizer/lsan_interface.h>
8585
#endif
8686

87+
#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
88+
8789
static enum {
8890
ACTION_RUN,
8991
ACTION_HELP,
@@ -135,7 +137,7 @@ static bool arg_default_ip_accounting;
135137
static bool arg_default_blockio_accounting;
136138
static bool arg_default_memory_accounting;
137139
static bool arg_default_tasks_accounting;
138-
static uint64_t arg_default_tasks_max;
140+
static TasksMax arg_default_tasks_max;
139141
static sd_id128_t arg_machine_id;
140142
static EmergencyAction arg_cad_burst_action;
141143
static OOMPolicy arg_default_oom_policy;
@@ -2131,7 +2133,7 @@ static void reset_arguments(void) {
21312133
arg_default_blockio_accounting = false;
21322134
arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
21332135
arg_default_tasks_accounting = true;
2134-
arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
2136+
arg_default_tasks_max = DEFAULT_TASKS_MAX;
21352137
arg_machine_id = (sd_id128_t) {};
21362138
arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
21372139
arg_default_oom_policy = OOM_STOP;

src/core/manager.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
762762
.default_timer_accuracy_usec = USEC_PER_MINUTE,
763763
.default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
764764
.default_tasks_accounting = true,
765-
.default_tasks_max = UINT64_MAX,
765+
.default_tasks_max = TASKS_MAX_UNSET,
766766
.default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
767767
.default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
768768
.default_restart_usec = DEFAULT_RESTART_USEC,

0 commit comments

Comments
 (0)
X Tutup