mirror of
https://git.savannah.gnu.org/git/guix.git
synced 2026-04-06 13:10:33 +02:00
build/syscalls: Introduce new safe-clone and use it.
* guix/build/syscalls.scm (without-automatic-finalization): Accept multiple
expressions.
(without-garbage-collection): New syntax.
(without-threads): Likewise.
(ensure-signal-delivery-thread, safe-clone): New procedures.
* tests/syscalls.scm: ("clone and unshare triggers EINVAL")
("safe-clone and unshare succeeds"): New tests.
* gnu/build/linux-container.scm (run-container): Adjust to use 'safe-clone'.
Relates-to: #1169
Change-Id: I044c11a899e24e547a7aed97f30c8e7250ab5363
This commit is contained in:
@@ -263,100 +263,93 @@ that host UIDs (respectively GIDs) map to in the namespace."
|
|||||||
;; child process blocks until the parent writes to it.
|
;; child process blocks until the parent writes to it.
|
||||||
(match (socketpair PF_UNIX (logior SOCK_CLOEXEC SOCK_STREAM) 0)
|
(match (socketpair PF_UNIX (logior SOCK_CLOEXEC SOCK_STREAM) 0)
|
||||||
((child . parent)
|
((child . parent)
|
||||||
(let ((flags (namespaces->bit-mask namespaces)))
|
(safe-clone
|
||||||
(match (clone flags)
|
(namespaces->bit-mask namespaces)
|
||||||
(0
|
(lambda ()
|
||||||
;; Inhibit thread creation until after the unshare call.
|
(call-with-clean-exit
|
||||||
(gc-disable)
|
(lambda ()
|
||||||
(call-with-clean-exit
|
(close-port parent)
|
||||||
(lambda ()
|
;; Wait for parent to set things up.
|
||||||
(close-port parent)
|
(match (read child)
|
||||||
;; Wait for parent to set things up.
|
('ready
|
||||||
(match (read child)
|
(purify-environment)
|
||||||
('ready
|
(when (and (memq 'mnt namespaces)
|
||||||
(purify-environment)
|
(not (string=? root "/")))
|
||||||
(when (and (memq 'mnt namespaces)
|
(catch #t
|
||||||
(not (string=? root "/")))
|
(lambda ()
|
||||||
(catch #t
|
(mount-file-systems root mounts
|
||||||
(lambda ()
|
#:mount-/proc? (memq 'pid namespaces)
|
||||||
(mount-file-systems root mounts
|
#:mount-/sys? (memq 'net
|
||||||
#:mount-/proc? (memq 'pid namespaces)
|
namespaces)
|
||||||
#:mount-/sys? (memq 'net
|
#:populate-file-system
|
||||||
namespaces)
|
(lambda ()
|
||||||
#:populate-file-system
|
(populate-file-system)
|
||||||
(lambda ()
|
(when (and (memq 'net namespaces)
|
||||||
(populate-file-system)
|
loopback-network?)
|
||||||
(when (and (memq 'net namespaces)
|
(set-network-interface-up "lo")
|
||||||
loopback-network?)
|
|
||||||
(set-network-interface-up "lo")
|
|
||||||
|
|
||||||
;; When isolated from the
|
;; When isolated from the
|
||||||
;; network, provide a minimal
|
;; network, provide a minimal
|
||||||
;; /etc/hosts to resolve
|
;; /etc/hosts to resolve
|
||||||
;; "localhost".
|
;; "localhost".
|
||||||
(mkdir-p "/etc")
|
(mkdir-p "/etc")
|
||||||
(call-with-output-file "/etc/hosts"
|
(call-with-output-file "/etc/hosts"
|
||||||
(lambda (port)
|
(lambda (port)
|
||||||
(display "127.0.0.1 localhost\n" port)
|
(display "127.0.0.1 localhost\n" port)
|
||||||
(chmod port #o444)))))
|
(chmod port #o444)))))
|
||||||
#:writable-root?
|
#:writable-root?
|
||||||
(or writable-root?
|
(or writable-root?
|
||||||
(not (memq 'mnt namespaces)))))
|
(not (memq 'mnt namespaces)))))
|
||||||
(lambda args
|
(lambda args
|
||||||
;; Forward the exception to the parent process.
|
;; Forward the exception to the parent process.
|
||||||
;; FIXME: SRFI-35 conditions and non-trivial objects
|
;; FIXME: SRFI-35 conditions and non-trivial objects
|
||||||
;; cannot be 'read' so they shouldn't be written as is.
|
;; cannot be 'read' so they shouldn't be written as is.
|
||||||
(write args child)
|
(write args child)
|
||||||
(primitive-exit 3))))
|
(primitive-exit 3))))
|
||||||
|
|
||||||
(when (and lock-mounts?
|
(when (and lock-mounts?
|
||||||
(memq 'mnt namespaces)
|
(memq 'mnt namespaces)
|
||||||
(memq 'user namespaces))
|
(memq 'user namespaces))
|
||||||
;; Create a new mount namespace owned by a new user
|
;; Create a new mount namespace owned by a new user
|
||||||
;; namespace to "lock" together previous mounts, such that
|
;; namespace to "lock" together previous mounts, such that
|
||||||
;; they cannot be unmounted or remounted separately--see
|
;; they cannot be unmounted or remounted separately--see
|
||||||
;; mount_namespaces(7).
|
;; mount_namespaces(7).
|
||||||
;;
|
(let ((uid (getuid)) (gid (getgid)))
|
||||||
;; Note: at this point, the process is single-threaded (no
|
(unshare (logior CLONE_NEWUSER CLONE_NEWNS))
|
||||||
;; GC mark threads, no finalization thread, etc.) which is
|
(when (file-exists? "/proc/self")
|
||||||
;; why unshare(CLONE_NEWUSER) can be used.
|
(initialize-user-namespace (getpid)
|
||||||
(let ((uid (getuid)) (gid (getgid)))
|
host-uids
|
||||||
(unshare (logior CLONE_NEWUSER CLONE_NEWNS))
|
#:host-uid uid
|
||||||
(gc-enable)
|
#:host-gid gid
|
||||||
(when (file-exists? "/proc/self")
|
#:guest-uid guest-uid
|
||||||
(initialize-user-namespace (getpid)
|
#:guest-gid guest-gid))))
|
||||||
host-uids
|
|
||||||
#:host-uid uid
|
|
||||||
#:host-gid gid
|
|
||||||
#:guest-uid guest-uid
|
|
||||||
#:guest-gid guest-gid))))
|
|
||||||
|
|
||||||
;; TODO: Manage capabilities.
|
;; TODO: Manage capabilities.
|
||||||
(write 'ready child)
|
(write 'ready child)
|
||||||
(close-port child)
|
(close-port child)
|
||||||
(thunk))
|
(thunk))
|
||||||
(_ ;parent died or something
|
(_ ;parent died or something
|
||||||
(primitive-exit 2))))))
|
(primitive-exit 2))))))
|
||||||
(pid
|
(lambda (pid)
|
||||||
(close-port child)
|
(close-port child)
|
||||||
(when (memq 'user namespaces)
|
(when (memq 'user namespaces)
|
||||||
(initialize-user-namespace pid host-uids
|
(initialize-user-namespace pid host-uids
|
||||||
#:guest-uid guest-uid
|
#:guest-uid guest-uid
|
||||||
#:guest-gid guest-gid))
|
#:guest-gid guest-gid))
|
||||||
;; TODO: Initialize cgroups.
|
;; TODO: Initialize cgroups.
|
||||||
(write 'ready parent)
|
(write 'ready parent)
|
||||||
(newline parent)
|
(newline parent)
|
||||||
|
|
||||||
;; Check whether the child process' setup phase succeeded.
|
;; Check whether the child process' setup phase succeeded.
|
||||||
(let ((message (read parent)))
|
(let ((message (read parent)))
|
||||||
(close-port parent)
|
(close-port parent)
|
||||||
(match message
|
(match message
|
||||||
('ready ;success
|
('ready ;success
|
||||||
pid)
|
pid)
|
||||||
(((? symbol? key) args ...) ;exception
|
(((? symbol? key) args ...) ;exception
|
||||||
(apply throw key args))
|
(apply throw key args))
|
||||||
(_ ;unexpected termination
|
(_ ;unexpected termination
|
||||||
#f)))))))))
|
#f))))))))
|
||||||
|
|
||||||
;; FIXME: This is copied from (guix utils), which we cannot use because it
|
;; FIXME: This is copied from (guix utils), which we cannot use because it
|
||||||
;; would pull (guix config) and all.
|
;; would pull (guix config) and all.
|
||||||
|
|||||||
@@ -150,6 +150,7 @@
|
|||||||
CLONE_THREAD
|
CLONE_THREAD
|
||||||
CLONE_VM
|
CLONE_VM
|
||||||
clone
|
clone
|
||||||
|
safe-clone
|
||||||
unshare
|
unshare
|
||||||
setns
|
setns
|
||||||
get-user-ns
|
get-user-ns
|
||||||
@@ -1170,17 +1171,45 @@ caller lacks root privileges."
|
|||||||
Turning finalization off shuts down the finalization thread as a side effect."
|
Turning finalization off shuts down the finalization thread as a side effect."
|
||||||
(->bool ((force proc) (if enabled? 1 0))))))
|
(->bool ((force proc) (if enabled? 1 0))))))
|
||||||
|
|
||||||
(define-syntax-rule (without-automatic-finalization exp)
|
(define-syntax-rule (without-automatic-finalization body ...)
|
||||||
"Turn off automatic finalization within the dynamic extent of EXP."
|
"Turn off automatic finalization within the dynamic extent of BODY. This is
|
||||||
|
useful to ensure there is no finalization thread."
|
||||||
(let ((enabled? #t))
|
(let ((enabled? #t))
|
||||||
(dynamic-wind
|
(dynamic-wind
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(set! enabled? (%set-automatic-finalization-enabled?! #f)))
|
(set! enabled? (%set-automatic-finalization-enabled?! #f)))
|
||||||
(lambda ()
|
(lambda ()
|
||||||
exp)
|
body ...)
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(%set-automatic-finalization-enabled?! enabled?)))))
|
(%set-automatic-finalization-enabled?! enabled?)))))
|
||||||
|
|
||||||
|
(define-syntax-rule (without-garbage-collection body ...)
|
||||||
|
"Turn off garbage collection within the dynamic extent of BODY. This is useful
|
||||||
|
to avoid the creation new garbage collection thread. Note that pre-existing
|
||||||
|
GC marker threads are only disabled, not terminated."
|
||||||
|
(dynamic-wind
|
||||||
|
(lambda ()
|
||||||
|
(gc-disable))
|
||||||
|
(lambda ()
|
||||||
|
body ...)
|
||||||
|
(lambda ()
|
||||||
|
(gc-enable))))
|
||||||
|
|
||||||
|
(define-syntax-rule (without-threads body ...)
|
||||||
|
"Ensure the Guile finalizer thread is stopped and that garbage collection does
|
||||||
|
not run. Note that pre-existing GC marker threads are only disabled, not
|
||||||
|
terminated. This also leaves the signal handling thread to be disabled by
|
||||||
|
another means, since there is no Guile API to do so."
|
||||||
|
;; Note: the three kind of threads that Guile can spawn are the finalization
|
||||||
|
;; thread, the signal thread, or the GC marker threads.
|
||||||
|
(without-automatic-finalization
|
||||||
|
(without-garbage-collection body ...)))
|
||||||
|
|
||||||
|
(define (ensure-signal-delivery-thread)
|
||||||
|
"Ensure the signal delivery thread is spawned and its state set
|
||||||
|
to 'RUNNING'. This is valid as of the implementation as of Guile 3.0.9."
|
||||||
|
(sigaction SIGUSR1)) ;could be any signal
|
||||||
|
|
||||||
;; The libc interface to sys_clone is not useful for Scheme programs, so the
|
;; The libc interface to sys_clone is not useful for Scheme programs, so the
|
||||||
;; low-level system call is wrapped instead. The 'syscall' function is
|
;; low-level system call is wrapped instead. The 'syscall' function is
|
||||||
;; declared in <unistd.h> as a variadic function; in practice, it expects 6
|
;; declared in <unistd.h> as a variadic function; in practice, it expects 6
|
||||||
@@ -1223,6 +1252,24 @@ are shared between the parent and child processes."
|
|||||||
(list err))
|
(list err))
|
||||||
ret)))))
|
ret)))))
|
||||||
|
|
||||||
|
(define (safe-clone flags child parent)
|
||||||
|
"This is a raw clone syscall wrapper that ensures no Guile thread will be
|
||||||
|
spawned during execution of the child. `clone' is called with FLAGS. CHILD
|
||||||
|
is a thunk to run in the child process. PARENT is procedure that accepts the
|
||||||
|
child PID as argument. This is useful in many contexts, such as when calling
|
||||||
|
`unshare' or async-unsafe procedures in the child when the parent process
|
||||||
|
memory (CLONE_VM) or threads (CLONE_THREAD) are shared with it."
|
||||||
|
;; TODO: Contribute `clone' to Guile, and handle these complications there,
|
||||||
|
;; similarly to how it's handled for scm_fork in posix.c.
|
||||||
|
|
||||||
|
;; XXX: This is a hack: as of Guile 3.0.9, by starting the signal delivery
|
||||||
|
;; thread in the parent, its state will be known as RUNNING, and the child
|
||||||
|
;; won't attempt to start it itself.
|
||||||
|
(ensure-signal-delivery-thread)
|
||||||
|
(match (clone flags)
|
||||||
|
(0 (without-threads (child)))
|
||||||
|
(pid (parent pid))))
|
||||||
|
|
||||||
(define (thread-count)
|
(define (thread-count)
|
||||||
"Return the complete thread count of the current process. Unlike
|
"Return the complete thread count of the current process. Unlike
|
||||||
`all-threads', this also counts the Guile signal delivery, and finalizer
|
`all-threads', this also counts the Guile signal delivery, and finalizer
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
;;; Copyright © 2015 David Thompson <davet@gnu.org>
|
;;; Copyright © 2015 David Thompson <davet@gnu.org>
|
||||||
;;; Copyright © 2020 Simon South <simon@simonsouth.net>
|
;;; Copyright © 2020 Simon South <simon@simonsouth.net>
|
||||||
;;; Copyright © 2020 Mathieu Othacehe <m.othacehe@gmail.com>
|
;;; Copyright © 2020 Mathieu Othacehe <m.othacehe@gmail.com>
|
||||||
|
;;; Copyright © 2025 Maxim Cournoyer <maxim@guixotic.coop>
|
||||||
;;;
|
;;;
|
||||||
;;; This file is part of GNU Guix.
|
;;; This file is part of GNU Guix.
|
||||||
;;;
|
;;;
|
||||||
@@ -29,7 +30,8 @@
|
|||||||
#:use-module (srfi srfi-71)
|
#:use-module (srfi srfi-71)
|
||||||
#:use-module (system foreign)
|
#:use-module (system foreign)
|
||||||
#:use-module ((ice-9 ftw) #:select (scandir))
|
#:use-module ((ice-9 ftw) #:select (scandir))
|
||||||
#:use-module (ice-9 match))
|
#:use-module (ice-9 match)
|
||||||
|
#:use-module (ice-9 threads))
|
||||||
|
|
||||||
;; Test the (guix build syscalls) module, although there's not much that can
|
;; Test the (guix build syscalls) module, although there's not much that can
|
||||||
;; actually be tested without being root.
|
;; actually be tested without being root.
|
||||||
@@ -158,6 +160,38 @@
|
|||||||
(lambda args
|
(lambda args
|
||||||
(system-error-errno args))))
|
(system-error-errno args))))
|
||||||
|
|
||||||
|
(define child-thunk
|
||||||
|
(lambda ()
|
||||||
|
(gc) ;spawn GC threads
|
||||||
|
(primitive-exit
|
||||||
|
(catch 'system-error
|
||||||
|
(lambda ()
|
||||||
|
(unshare CLONE_THREAD)
|
||||||
|
0) ;no error
|
||||||
|
(lambda args
|
||||||
|
(system-error-errno args))))))
|
||||||
|
|
||||||
|
(define parent-proc
|
||||||
|
(lambda (pid)
|
||||||
|
(match (waitpid pid)
|
||||||
|
((_ . status)
|
||||||
|
(status:exit-val status)))))
|
||||||
|
|
||||||
|
(unless perform-container-tests?
|
||||||
|
(test-skip 1))
|
||||||
|
(test-equal "clone and unshare triggers EINVAL"
|
||||||
|
EINVAL
|
||||||
|
(match (clone (logior CLONE_NEWUSER SIGCHLD))
|
||||||
|
(0 (child-thunk))
|
||||||
|
(pid (parent-proc pid))))
|
||||||
|
|
||||||
|
(unless perform-container-tests?
|
||||||
|
(test-skip 1))
|
||||||
|
(test-equal "safe-clone and unshare succeeds"
|
||||||
|
0
|
||||||
|
(safe-clone (logior CLONE_NEWUSER SIGCHLD)
|
||||||
|
child-thunk parent-proc))
|
||||||
|
|
||||||
(unless perform-container-tests?
|
(unless perform-container-tests?
|
||||||
(test-skip 1))
|
(test-skip 1))
|
||||||
(test-assert "setns"
|
(test-assert "setns"
|
||||||
|
|||||||
Reference in New Issue
Block a user