X Tutup
Skip to content

Commit d06e7fb

Browse files
committed
oomd: increase accuracy of SwapUsedLimit= to permyriads too
oomd.conf has two parameters with fractionals: SwapUsedLimit= and DefaultMemoryPressureLimit=, but one accepts permyriads, the other only percentages, for no apparent reason. One carries the "Percent" in the name, the other doesn't. Let's clean this up: always accept permyriads, and drop the suffix, given that it is misleading. I figure we should internally try to focus on scaling everything relative to UINT32_MAX, and if that isn't in the cards at least 10000, but never permille nor percent unless there's a really really good reason for it (e.g. interface defined by someone else).
1 parent d9d3f05 commit d06e7fb

File tree

8 files changed

+72
-62
lines changed

8 files changed

+72
-62
lines changed

man/oomd.conf.xml

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -48,36 +48,38 @@
4848

4949
<variablelist class='config-directives'>
5050
<varlistentry>
51-
<term><varname>SwapUsedLimitPercent=</varname></term>
52-
53-
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command> will
54-
take action. If the percentage of swap used on the system is more than what is defined here,
55-
<command>systemd-oomd</command> will act on eligible descendant cgroups, starting from the ones with the
56-
highest swap usage to the lowest swap usage. Which cgroups are monitored and what
57-
action gets taken depends on what the unit has configured for <varname>ManagedOOMSwap=</varname>.
58-
Takes a percentage value between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
51+
<term><varname>SwapUsedLimit=</varname></term>
52+
53+
<listitem><para>Sets the limit for swap usage on the system before <command>systemd-oomd</command>
54+
will take action. If the fraction of swap used on the system is more than what is defined here,
55+
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
56+
ones with the highest swap usage to the lowest swap usage. Which control groups are monitored and
57+
what action gets taken depends on what the unit has configured for
58+
<varname>ManagedOOMSwap=</varname>. Takes a value specified in percent (when suffixed with "%"),
59+
permille ("‰") or permyriad ("‱"), between 0% and 100%, inclusive. Defaults to 90%.</para></listitem>
5960
</varlistentry>
6061

6162
<varlistentry>
6263
<term><varname>DefaultMemoryPressureLimit=</varname></term>
6364

64-
<listitem><para>Sets the limit for memory pressure on the unit's cgroup before <command>systemd-oomd</command>
65-
will take action. A unit can override this value with <varname>ManagedOOMMemoryPressureLimit=</varname>.
66-
The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks
67-
in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the
68-
limit set for longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
69-
<command>systemd-oomd</command> will act on eligible descendant cgroups,
70-
starting from the ones with the most reclaim activity to the least reclaim activity. Which cgroups are
71-
monitored and what action gets taken depends on what the unit has configured for
72-
<varname>ManagedOOMMemoryPressure=</varname>. Takes a percentage value between 0% and 100%, inclusive.
73-
Defaults to 60%.</para></listitem>
65+
<listitem><para>Sets the limit for memory pressure on the unit's control group before
66+
<command>systemd-oomd</command> will take action. A unit can override this value with
67+
<varname>ManagedOOMMemoryPressureLimit=</varname>. The memory pressure for this property represents
68+
the fraction of time in a 10 second window in which all tasks in the control group were delayed. For
69+
each monitored control group, if the memory pressure on that control group exceeds the limit set for
70+
longer than the duration set by <varname>DefaultMemoryPressureDurationSec=</varname>,
71+
<command>systemd-oomd</command> will act on eligible descendant control groups, starting from the
72+
ones with the most reclaim activity to the least reclaim activity. Which control groups are monitored
73+
and what action gets taken depends on what the unit has configured for
74+
<varname>ManagedOOMMemoryPressure=</varname>. Takes a fraction specified in the same way as
75+
<varname>SwapUsedLimit=</varname> above. Defaults to 60%.</para></listitem>
7476
</varlistentry>
7577

7678
<varlistentry>
7779
<term><varname>DefaultMemoryPressureDurationSec=</varname></term>
7880

79-
<listitem><para>Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before
80-
<command>systemd-oomd</command> will take action. Memory pressure limits are defined by
81+
<listitem><para>Sets the amount of time a unit's control group needs to have exceeded memory pressure
82+
limits before <command>systemd-oomd</command> will take action. Memory pressure limits are defined by
8183
<varname>DefaultMemoryPressureLimit=</varname> and <varname>ManagedOOMMemoryPressureLimit=</varname>.
8284
Defaults to 30 seconds when this property is unset or set to 0.</para></listitem>
8385
</varlistentry>

src/oom/oomd-manager.c

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ typedef struct ManagedOOMReply {
1616
ManagedOOMMode mode;
1717
char *path;
1818
char *property;
19-
unsigned limit;
19+
uint32_t limit;
2020
} ManagedOOMReply;
2121

2222
static void managed_oom_reply_destroy(ManagedOOMReply *reply) {
@@ -53,10 +53,10 @@ static int process_managed_oom_reply(
5353
assert(m);
5454

5555
static const JsonDispatch dispatch_table[] = {
56-
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
57-
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
58-
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
59-
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(ManagedOOMReply, limit), 0 },
56+
{ "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
57+
{ "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
58+
{ "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
59+
{ "limit", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(ManagedOOMReply, limit), 0 },
6060
{},
6161
};
6262

@@ -87,7 +87,8 @@ static int process_managed_oom_reply(
8787
if (ret == -ENOMEM) {
8888
r = ret;
8989
goto finish;
90-
} else if (ret < 0)
90+
}
91+
if (ret < 0)
9192
continue;
9293

9394
monitor_hm = streq(reply.property, "ManagedOOMSwap") ?
@@ -100,19 +101,15 @@ static int process_managed_oom_reply(
100101

101102
limit = m->default_mem_pressure_limit;
102103

103-
if (streq(reply.property, "ManagedOOMMemoryPressure")) {
104-
if (reply.limit > UINT32_MAX) /* out of range */
104+
if (streq(reply.property, "ManagedOOMMemoryPressure") && reply.limit > 0) {
105+
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
106+
107+
ret = store_loadavg_fixed_point(
108+
(unsigned long) permyriad / 100,
109+
(unsigned long) permyriad % 100,
110+
&limit);
111+
if (ret < 0)
105112
continue;
106-
if (reply.limit != 0) {
107-
int permyriad = UINT32_SCALE_TO_PERMYRIAD(reply.limit);
108-
109-
ret = store_loadavg_fixed_point(
110-
(unsigned long) permyriad / 100,
111-
(unsigned long) permyriad % 100,
112-
&limit);
113-
if (ret < 0)
114-
continue;
115-
}
116113
}
117114

118115
ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path));
@@ -354,11 +351,11 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
354351
}
355352
}
356353

357-
if (oomd_swap_free_below(&m->system_context, (100 - m->swap_used_limit))) {
354+
if (oomd_swap_free_below(&m->system_context, 10000 - m->swap_used_limit_permyriad)) {
358355
_cleanup_hashmap_free_ Hashmap *candidates = NULL;
359356

360-
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than %u%%",
361-
m->system_context.swap_used, m->system_context.swap_total, m->swap_used_limit);
357+
log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR,
358+
m->system_context.swap_used, m->system_context.swap_total, PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad));
362359

363360
r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
364361
if (r == -ENOMEM)
@@ -484,18 +481,24 @@ static int manager_connect_bus(Manager *m) {
484481
return 0;
485482
}
486483

487-
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec) {
484+
int manager_start(
485+
Manager *m,
486+
bool dry_run,
487+
int swap_used_limit_permyriad,
488+
int mem_pressure_limit_permyriad,
489+
usec_t mem_pressure_usec) {
490+
488491
unsigned long l, f;
489492
int r;
490493

491494
assert(m);
492495

493496
m->dry_run = dry_run;
494497

495-
m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT;
496-
assert(m->swap_used_limit <= 100);
498+
m->swap_used_limit_permyriad = swap_used_limit_permyriad >= 0 ? swap_used_limit_permyriad : DEFAULT_SWAP_USED_LIMIT_PERCENT * 100;
499+
assert(m->swap_used_limit_permyriad <= 10000);
497500

498-
if (mem_pressure_limit_permyriad != -1) {
501+
if (mem_pressure_limit_permyriad >= 0) {
499502
assert(mem_pressure_limit_permyriad <= 10000);
500503

501504
l = mem_pressure_limit_permyriad / 100;
@@ -543,12 +546,12 @@ int manager_get_dump_string(Manager *m, char **ret) {
543546

544547
fprintf(f,
545548
"Dry Run: %s\n"
546-
"Swap Used Limit: %u%%\n"
549+
"Swap Used Limit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n"
547550
"Default Memory Pressure Limit: %lu.%02lu%%\n"
548551
"Default Memory Pressure Duration: %s\n"
549552
"System Context:\n",
550553
yes_no(m->dry_run),
551-
m->swap_used_limit,
554+
PERMYRIAD_AS_PERCENT_FORMAT_VAL(m->swap_used_limit_permyriad),
552555
LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit),
553556
format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC));
554557
oomd_dump_system_context(&m->system_context, f, "\t");

src/oom/oomd-manager.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* system.slice are assumed to be less latency sensitive. */
1919
#define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC)
2020
#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60
21-
#define DEFAULT_SWAP_USED_LIMIT 90
21+
#define DEFAULT_SWAP_USED_LIMIT_PERCENT 90
2222

2323
#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC)
2424
#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
@@ -32,7 +32,7 @@ struct Manager {
3232
Hashmap *polkit_registry;
3333

3434
bool dry_run;
35-
unsigned swap_used_limit;
35+
int swap_used_limit_permyriad;
3636
loadavg_t default_mem_pressure_limit;
3737
usec_t default_mem_pressure_duration_usec;
3838

@@ -56,7 +56,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
5656

5757
int manager_new(Manager **ret);
5858

59-
int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
59+
int manager_start(Manager *m, bool dry_run, int swap_used_limit_permyriad, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec);
6060

6161
int manager_get_dump_string(Manager *m, char **ret);
6262

src/oom/oomd-util.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,13 @@ bool oomd_memory_reclaim(Hashmap *h) {
134134
return pgscan_of > last_pgscan_of;
135135
}
136136

137-
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) {
137+
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad) {
138138
uint64_t swap_threshold;
139139

140140
assert(ctx);
141-
assert(threshold_percent <= 100);
141+
assert(threshold_permyriad <= 10000);
142142

143-
swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100);
143+
swap_threshold = ctx->swap_total * threshold_permyriad / (uint64_t) 10000;
144144
return (ctx->swap_total - ctx->swap_used) < swap_threshold;
145145
}
146146

src/oom/oomd-util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
6161
* current sum is higher than the last interval's sum (there was some reclaim activity). */
6262
bool oomd_memory_reclaim(Hashmap *h);
6363

64-
/* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */
65-
bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent);
64+
/* Returns true if the amount of swap free is below the permyriad of swap specified by `threshold_permyriad`. */
65+
bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad);
6666

6767
/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end
6868
* (after the smallest values). */

src/oom/oomd.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
#include "signal-util.h"
1818

1919
static bool arg_dry_run = false;
20-
static int arg_swap_used_limit = -1;
20+
static int arg_swap_used_limit_permyriad = -1;
2121
static int arg_mem_pressure_limit_permyriad = -1;
2222
static usec_t arg_mem_pressure_usec = 0;
2323

2424
static int parse_config(void) {
2525
static const ConfigTableItem items[] = {
26-
{ "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit },
26+
{ "OOM", "SwapUsedLimit", config_parse_permyriad, 0, &arg_swap_used_limit_permyriad },
2727
{ "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad },
2828
{ "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec },
2929
{}
@@ -159,7 +159,12 @@ static int run(int argc, char *argv[]) {
159159
if (r < 0)
160160
return log_error_errno(r, "Failed to create manager: %m");
161161

162-
r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit_permyriad, arg_mem_pressure_usec);
162+
r = manager_start(
163+
m,
164+
arg_dry_run,
165+
arg_swap_used_limit_permyriad,
166+
arg_mem_pressure_limit_permyriad,
167+
arg_mem_pressure_usec);
163168
if (r < 0)
164169
return log_error_errno(r, "Failed to start up daemon: %m");
165170

src/oom/oomd.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@
1212
# See oomd.conf(5) for details
1313

1414
[OOM]
15-
#SwapUsedLimitPercent=90%
15+
#SwapUsedLimit=90%
1616
#DefaultMemoryPressureLimit=60%
1717
#DefaultMemoryPressureDurationSec=30s

src/oom/test-oomd-util.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,19 +302,19 @@ static void test_oomd_swap_free_below(void) {
302302
.swap_total = 20971512 * 1024U,
303303
.swap_used = 20971440 * 1024U,
304304
};
305-
assert_se(oomd_swap_free_below(&ctx, 20) == true);
305+
assert_se(oomd_swap_free_below(&ctx, 2000) == true);
306306

307307
ctx = (OomdSystemContext) {
308308
.swap_total = 20971512 * 1024U,
309309
.swap_used = 3310136 * 1024U,
310310
};
311-
assert_se(oomd_swap_free_below(&ctx, 20) == false);
311+
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
312312

313313
ctx = (OomdSystemContext) {
314314
.swap_total = 0,
315315
.swap_used = 0,
316316
};
317-
assert_se(oomd_swap_free_below(&ctx, 20) == false);
317+
assert_se(oomd_swap_free_below(&ctx, 2000) == false);
318318
}
319319

320320
static void test_oomd_sort_cgroups(void) {

0 commit comments

Comments
 (0)
X Tutup