X Tutup
Skip to content

Commit b199438

Browse files
committed
core: use LSM BPF functions to implement RestrictFileSystems=
It attaches the LSM BPF program when the system manager starts up. It populates the hash of maps BPF map when services that have RestrictFileSystems= set start. It cleans up the hash of maps when the unit cgroup is pruned. To pass the file descriptor of the BPF map we add it to the keep_fds array.
1 parent 184b4f7 commit b199438

File tree

7 files changed

+99
-1
lines changed

7 files changed

+99
-1
lines changed

src/basic/cgroup-util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ typedef enum CGroupController {
3333
CGROUP_CONTROLLER_BPF_FOREIGN,
3434
CGROUP_CONTROLLER_BPF_SOCKET_BIND,
3535
CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
36+
/* The BPF hook implementing RestrictFileSystems= is not defined here.
37+
* It's applied as late as possible in exec_child() so we don't block
38+
* our own unit setup code. */
3639

3740
_CGROUP_CONTROLLER_MAX,
3841
_CGROUP_CONTROLLER_INVALID = -EINVAL,

src/core/cgroup.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
#include "string-util.h"
3838
#include "virt.h"
3939

40+
#if BPF_FRAMEWORK
41+
#include "bpf-dlopen.h"
42+
#include "bpf-link.h"
43+
#include "bpf/restrict_fs/restrict-fs-skel.h"
44+
#endif
45+
4046
#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
4147

4248
/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
@@ -2736,6 +2742,10 @@ void unit_prune_cgroup(Unit *u) {
27362742

27372743
(void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
27382744

2745+
#if BPF_FRAMEWORK
2746+
(void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
2747+
#endif
2748+
27392749
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
27402750

27412751
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);

src/core/cgroup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <stdbool.h>
55

6+
#include "bpf-lsm.h"
67
#include "cgroup-util.h"
78
#include "cpu-set-util.h"
89
#include "list.h"

src/core/execute.c

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#endif
4242
#include "async.h"
4343
#include "barrier.h"
44+
#include "bpf-lsm.h"
4445
#include "cap-list.h"
4546
#include "capability-util.h"
4647
#include "cgroup-setup.h"
@@ -1685,6 +1686,29 @@ static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
16851686
return seccomp_restrict_namespaces(c->restrict_namespaces);
16861687
}
16871688

1689+
#if HAVE_LIBBPF
1690+
static bool skip_lsm_bpf_unsupported(const Unit* u, const char* msg) {
1691+
if (lsm_bpf_supported())
1692+
return false;
1693+
1694+
log_unit_debug(u, "LSM BPF not supported, skipping %s", msg);
1695+
return true;
1696+
}
1697+
1698+
static int apply_restrict_filesystems(Unit *u, const ExecContext *c) {
1699+
assert(u);
1700+
assert(c);
1701+
1702+
if (!exec_context_restrict_filesystems_set(c))
1703+
return 0;
1704+
1705+
if (skip_lsm_bpf_unsupported(u, "RestrictFileSystems="))
1706+
return 0;
1707+
1708+
return lsm_bpf_unit_restrict_filesystems(u, c->restrict_filesystems, c->restrict_filesystems_allow_list);
1709+
}
1710+
#endif
1711+
16881712
static int apply_lock_personality(const Unit* u, const ExecContext *c) {
16891713
unsigned long personality;
16901714
int r;
@@ -3813,7 +3837,7 @@ static int exec_child(
38133837
/* In case anything used libc syslog(), close this here, too */
38143838
closelog();
38153839

3816-
int keep_fds[n_fds + 2];
3840+
int keep_fds[n_fds + 3];
38173841
memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
38183842
n_keep_fds = n_fds;
38193843

@@ -3823,6 +3847,24 @@ static int exec_child(
38233847
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
38243848
}
38253849

3850+
#if HAVE_LIBBPF
3851+
if (MANAGER_IS_SYSTEM(unit->manager) && lsm_bpf_supported()) {
3852+
int bpf_map_fd = -1;
3853+
3854+
bpf_map_fd = lsm_bpf_map_restrict_fs_fd(unit);
3855+
if (bpf_map_fd < 0) {
3856+
*exit_status = EXIT_FDS;
3857+
return log_unit_error_errno(unit, r, "Failed to get restrict filesystems BPF map fd: %m");
3858+
}
3859+
3860+
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, bpf_map_fd, &bpf_map_fd);
3861+
if (r < 0) {
3862+
*exit_status = EXIT_FDS;
3863+
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
3864+
}
3865+
}
3866+
#endif
3867+
38263868
r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds);
38273869
if (r < 0) {
38283870
*exit_status = EXIT_FDS;
@@ -4682,6 +4724,15 @@ static int exec_child(
46824724
return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
46834725
}
46844726
#endif
4727+
4728+
#if HAVE_LIBBPF
4729+
r = apply_restrict_filesystems(unit, context);
4730+
if (r < 0) {
4731+
*exit_status = EXIT_BPF;
4732+
return log_unit_error_errno(unit, r, "Failed to restrict filesystems: %m");
4733+
}
4734+
#endif
4735+
46854736
}
46864737

46874738
if (!strv_isempty(context->unset_environment)) {
@@ -4967,6 +5018,8 @@ void exec_context_done(ExecContext *c) {
49675018
c->apparmor_profile = mfree(c->apparmor_profile);
49685019
c->smack_process_label = mfree(c->smack_process_label);
49695020

5021+
c->restrict_filesystems = set_free(c->restrict_filesystems);
5022+
49705023
c->syscall_filter = hashmap_free(c->syscall_filter);
49715024
c->syscall_archs = set_free(c->syscall_archs);
49725025
c->address_families = set_free(c->address_families);
@@ -5734,6 +5787,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
57345787
prefix, strna(s));
57355788
}
57365789

5790+
#if HAVE_LIBBPF
5791+
if (exec_context_restrict_filesystems_set(c))
5792+
SET_FOREACH(e, c->restrict_filesystems)
5793+
fprintf(f, "%sRestrictFileSystems: %s\n", prefix, *e);
5794+
#endif
5795+
57375796
if (c->network_namespace_path)
57385797
fprintf(f,
57395798
"%sNetworkNamespacePath: %s\n",

src/core/execute.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,9 @@ struct ExecContext {
314314

315315
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
316316

317+
Set *restrict_filesystems;
318+
bool restrict_filesystems_allow_list:1;
319+
317320
Hashmap *syscall_filter;
318321
Set *syscall_archs;
319322
int syscall_errno;
@@ -342,6 +345,13 @@ static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
342345
return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
343346
}
344347

348+
static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) {
349+
assert(c);
350+
351+
return c->restrict_filesystems_allow_list ||
352+
!set_isempty(c->restrict_filesystems);
353+
}
354+
345355
static inline bool exec_context_with_rootfs(const ExecContext *c) {
346356
assert(c);
347357

src/core/main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include "alloc-util.h"
2323
#include "apparmor-setup.h"
2424
#include "architecture.h"
25+
#if HAVE_LIBBPF
26+
#include "bpf-lsm.h"
27+
#endif
2528
#include "build.h"
2629
#include "bus-error.h"
2730
#include "bus-util.h"

src/core/manager.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,14 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
930930
r = manager_setup_sigchld_event_source(m);
931931
if (r < 0)
932932
return r;
933+
934+
#if HAVE_LIBBPF
935+
if (MANAGER_IS_SYSTEM(m) && lsm_bpf_supported()) {
936+
r = lsm_bpf_setup(m);
937+
if (r < 0)
938+
return r;
939+
}
940+
#endif
933941
}
934942

935943
if (test_run_flags == 0) {
@@ -1535,6 +1543,10 @@ Manager* manager_free(Manager *m) {
15351543
m->prefix[dt] = mfree(m->prefix[dt]);
15361544
free(m->received_credentials);
15371545

1546+
#if BPF_FRAMEWORK
1547+
lsm_bpf_destroy(m->restrict_fs);
1548+
#endif
1549+
15381550
return mfree(m);
15391551
}
15401552

0 commit comments

Comments
 (0)
X Tutup