@@ -299,8 +299,7 @@ static int acquire_managed_oom_connect(Manager *m) {
299299 return 0 ;
300300}
301301
302- static int monitor_cgroup_contexts_handler (sd_event_source * s , uint64_t usec , void * userdata ) {
303- _cleanup_set_free_ Set * targets = NULL ;
302+ static int monitor_swap_contexts_handler (sd_event_source * s , uint64_t usec , void * userdata ) {
304303 Manager * m = userdata ;
305304 usec_t usec_now ;
306305 int r ;
@@ -313,7 +312,7 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
313312 if (r < 0 )
314313 return log_error_errno (r , "Failed to reset event timer: %m" );
315314
316- r = sd_event_source_set_time_relative (s , INTERVAL_USEC );
315+ r = sd_event_source_set_time_relative (s , SWAP_INTERVAL_USEC );
317316 if (r < 0 )
318317 return log_error_errno (r , "Failed to set relative time for timer: %m" );
319318
@@ -324,13 +323,89 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
324323 return log_error_errno (r , "Failed to acquire varlink connection: %m" );
325324 }
326325
327- /* Update the cgroups used for detection/action */
328- r = update_monitored_cgroup_contexts (& m -> monitored_swap_cgroup_contexts );
329- if (r == - ENOMEM )
330- return log_oom ();
326+ /* We still try to acquire swap information for oomctl even if no units want swap monitoring */
327+ r = oomd_system_context_acquire ("/proc/swaps" , & m -> system_context );
328+ /* If there are no units depending on swap actions, the only error we exit on is ENOMEM.
329+ * Allow ENOENT in the event that swap is disabled on the system. */
330+ if (r == - ENOENT ) {
331+ zero (m -> system_context );
332+ return 0 ;
333+ } else if (r == - ENOMEM || (r < 0 && !hashmap_isempty (m -> monitored_swap_cgroup_contexts )))
334+ return log_error_errno (r , "Failed to acquire system context: %m" );
335+
336+ /* Return early if nothing is requesting swap monitoring */
337+ if (hashmap_isempty (m -> monitored_swap_cgroup_contexts ))
338+ return 0 ;
339+
340+ /* Note that m->monitored_swap_cgroup_contexts does not need to be updated every interval because only the
341+ * system context is used for deciding whether the swap threshold is hit. m->monitored_swap_cgroup_contexts
342+ * is only used to decide which cgroups to kill (and even then only the resource usages of its descendent
343+ * nodes are the ones that matter). */
344+
345+ if (oomd_swap_free_below (& m -> system_context , 10000 - m -> swap_used_limit_permyriad )) {
346+ _cleanup_hashmap_free_ Hashmap * candidates = NULL ;
347+ _cleanup_free_ char * selected = NULL ;
348+
349+ log_debug ("Swap used (%" PRIu64 ") / total (%" PRIu64 ") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR ,
350+ m -> system_context .swap_used , m -> system_context .swap_total ,
351+ PERMYRIAD_AS_PERCENT_FORMAT_VAL (m -> swap_used_limit_permyriad ));
352+
353+ r = get_monitored_cgroup_contexts_candidates (m -> monitored_swap_cgroup_contexts , & candidates );
354+ if (r == - ENOMEM )
355+ return log_oom ();
356+ if (r < 0 )
357+ log_debug_errno (r , "Failed to get monitored swap cgroup candidates, ignoring: %m" );
358+
359+ r = oomd_kill_by_swap_usage (candidates , m -> dry_run , & selected );
360+ if (r == - ENOMEM )
361+ return log_oom ();
362+ if (r < 0 )
363+ log_notice_errno (r , "Failed to kill any cgroup(s) based on swap: %m" );
364+ else {
365+ if (selected )
366+ log_notice ("Killed %s due to swap used (%" PRIu64 ") / total (%" PRIu64 ") being more than "
367+ PERMYRIAD_AS_PERCENT_FORMAT_STR ,
368+ selected , m -> system_context .swap_used , m -> system_context .swap_total ,
369+ PERMYRIAD_AS_PERCENT_FORMAT_VAL (m -> swap_used_limit_permyriad ));
370+ return 0 ;
371+ }
372+ }
373+
374+ return 0 ;
375+ }
376+
377+ static int monitor_memory_pressure_contexts_handler (sd_event_source * s , uint64_t usec , void * userdata ) {
378+ _cleanup_set_free_ Set * targets = NULL ;
379+ Manager * m = userdata ;
380+ usec_t usec_now ;
381+ int r ;
382+
383+ assert (s );
384+ assert (userdata );
385+
386+ /* Reset timer */
387+ r = sd_event_now (sd_event_source_get_event (s ), CLOCK_MONOTONIC , & usec_now );
331388 if (r < 0 )
332- log_debug_errno (r , "Failed to update monitored swap cgroup contexts, ignoring: %m" );
389+ return log_error_errno (r , "Failed to reset event timer: %m" );
390+
391+ r = sd_event_source_set_time_relative (s , MEM_PRESSURE_INTERVAL_USEC );
392+ if (r < 0 )
393+ return log_error_errno (r , "Failed to set relative time for timer: %m" );
394+
395+ /* Reconnect if our connection dropped */
396+ if (!m -> varlink ) {
397+ r = acquire_managed_oom_connect (m );
398+ if (r < 0 )
399+ return log_error_errno (r , "Failed to acquire varlink connection: %m" );
400+ }
333401
402+ /* Return early if nothing is requesting memory pressure monitoring */
403+ if (hashmap_isempty (m -> monitored_mem_pressure_cgroup_contexts )) {
404+ hashmap_clear (m -> monitored_mem_pressure_cgroup_contexts_candidates );
405+ return 0 ;
406+ }
407+
408+ /* Update the cgroups used for detection/action */
334409 r = update_monitored_cgroup_contexts (& m -> monitored_mem_pressure_cgroup_contexts );
335410 if (r == - ENOMEM )
336411 return log_oom ();
@@ -344,23 +419,16 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
344419 if (r < 0 )
345420 log_debug_errno (r , "Failed to update monitored memory pressure candidate cgroup contexts, ignoring: %m" );
346421
347- r = oomd_system_context_acquire ("/proc/swaps" , & m -> system_context );
348- /* If there aren't units depending on swap actions, the only error we exit on is ENOMEM.
349- * Allow ENOENT in the event that swap is disabled on the system. */
350- if (r == - ENOMEM || (r < 0 && r != - ENOENT && !hashmap_isempty (m -> monitored_swap_cgroup_contexts )))
351- return log_error_errno (r , "Failed to acquire system context: %m" );
352- else if (r == - ENOENT )
353- zero (m -> system_context );
354-
355422 if (oomd_memory_reclaim (m -> monitored_mem_pressure_cgroup_contexts ))
356423 m -> last_reclaim_at = usec_now ;
357424
358- /* If we're still recovering from a kill, don't try to kill again yet */
359- if (m -> post_action_delay_start > 0 ) {
360- if (m -> post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now )
425+ /* Since pressure counters are lagging, we need to wait a bit after a kill to ensure we don't read stale
426+ * values and go on a kill storm. */
427+ if (m -> mem_pressure_post_action_delay_start > 0 ) {
428+ if (m -> mem_pressure_post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now )
361429 return 0 ;
362430 else
363- m -> post_action_delay_start = 0 ;
431+ m -> mem_pressure_post_action_delay_start = 0 ;
364432 }
365433
366434 r = oomd_pressure_above (m -> monitored_mem_pressure_cgroup_contexts , m -> default_mem_pressure_duration_usec , & targets );
@@ -396,7 +464,7 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
396464 log_notice_errno (r , "Failed to kill any cgroup(s) under %s based on pressure: %m" , t -> path );
397465 else {
398466 /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
399- m -> post_action_delay_start = usec_now ;
467+ m -> mem_pressure_post_action_delay_start = usec_now ;
400468 if (selected )
401469 log_notice ("Killed %s due to memory pressure for %s being %lu.%02lu%% > %lu.%02lu%%"
402470 " for > %s with reclaim activity" ,
@@ -412,47 +480,42 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo
412480 }
413481 }
414482
415- if (oomd_swap_free_below (& m -> system_context , 10000 - m -> swap_used_limit_permyriad )) {
416- _cleanup_hashmap_free_ Hashmap * candidates = NULL ;
417- _cleanup_free_ char * selected = NULL ;
483+ return 0 ;
484+ }
418485
419- log_debug ( "Swap used (%" PRIu64 ") / total (%" PRIu64 ") is more than " PERMYRIAD_AS_PERCENT_FORMAT_STR ,
420- m -> system_context . swap_used , m -> system_context . swap_total ,
421- PERMYRIAD_AS_PERCENT_FORMAT_VAL ( m -> swap_used_limit_permyriad )) ;
486+ static int monitor_swap_contexts ( Manager * m ) {
487+ _cleanup_ ( sd_event_source_unrefp ) sd_event_source * s = NULL ;
488+ int r ;
422489
423- r = get_monitored_cgroup_contexts_candidates (m -> monitored_swap_cgroup_contexts , & candidates );
424- if (r == - ENOMEM )
425- return log_oom ();
426- if (r < 0 )
427- log_debug_errno (r , "Failed to get monitored swap cgroup candidates, ignoring: %m" );
490+ assert (m );
491+ assert (m -> event );
428492
429- r = oomd_kill_by_swap_usage (candidates , m -> dry_run , & selected );
430- if (r == - ENOMEM )
431- return log_oom ();
432- if (r < 0 )
433- log_notice_errno (r , "Failed to kill any cgroup(s) based on swap: %m" );
434- else {
435- m -> post_action_delay_start = usec_now ;
436- if (selected )
437- log_notice ("Killed %s due to swap used (%" PRIu64 ") / total (%" PRIu64 ") being more than "
438- PERMYRIAD_AS_PERCENT_FORMAT_STR ,
439- selected , m -> system_context .swap_used , m -> system_context .swap_total ,
440- PERMYRIAD_AS_PERCENT_FORMAT_VAL (m -> swap_used_limit_permyriad ));
441- return 0 ;
442- }
443- }
493+ r = sd_event_add_time (m -> event , & s , CLOCK_MONOTONIC , 0 , 0 , monitor_swap_contexts_handler , m );
494+ if (r < 0 )
495+ return r ;
496+
497+ r = sd_event_source_set_exit_on_failure (s , true);
498+ if (r < 0 )
499+ return r ;
444500
501+ r = sd_event_source_set_enabled (s , SD_EVENT_ON );
502+ if (r < 0 )
503+ return r ;
504+
505+ (void ) sd_event_source_set_description (s , "oomd-swap-timer" );
506+
507+ m -> swap_context_event_source = TAKE_PTR (s );
445508 return 0 ;
446509}
447510
448- static int monitor_cgroup_contexts (Manager * m ) {
511+ static int monitor_memory_pressure_contexts (Manager * m ) {
449512 _cleanup_ (sd_event_source_unrefp ) sd_event_source * s = NULL ;
450513 int r ;
451514
452515 assert (m );
453516 assert (m -> event );
454517
455- r = sd_event_add_time (m -> event , & s , CLOCK_MONOTONIC , 0 , 0 , monitor_cgroup_contexts_handler , m );
518+ r = sd_event_add_time (m -> event , & s , CLOCK_MONOTONIC , 0 , 0 , monitor_memory_pressure_contexts_handler , m );
456519 if (r < 0 )
457520 return r ;
458521
@@ -464,17 +527,18 @@ static int monitor_cgroup_contexts(Manager *m) {
464527 if (r < 0 )
465528 return r ;
466529
467- (void ) sd_event_source_set_description (s , "oomd-timer" );
530+ (void ) sd_event_source_set_description (s , "oomd-memory-pressure- timer" );
468531
469- m -> cgroup_context_event_source = TAKE_PTR (s );
532+ m -> mem_pressure_context_event_source = TAKE_PTR (s );
470533 return 0 ;
471534}
472535
473536Manager * manager_free (Manager * m ) {
474537 assert (m );
475538
476539 varlink_close_unref (m -> varlink );
477- sd_event_source_unref (m -> cgroup_context_event_source );
540+ sd_event_source_unref (m -> swap_context_event_source );
541+ sd_event_source_unref (m -> mem_pressure_context_event_source );
478542 sd_event_unref (m -> event );
479543
480544 bus_verify_polkit_async_registry_free (m -> polkit_registry );
@@ -596,7 +660,11 @@ int manager_start(
596660 if (r < 0 )
597661 return r ;
598662
599- r = monitor_cgroup_contexts (m );
663+ r = monitor_memory_pressure_contexts (m );
664+ if (r < 0 )
665+ return r ;
666+
667+ r = monitor_swap_contexts (m );
600668 if (r < 0 )
601669 return r ;
602670
0 commit comments