std: reorganize the UNIX-internal weak module

This commit is contained in:
joboet
2025-09-16 12:13:01 +02:00
parent f3fd3efe4f
commit e043a0b41e
8 changed files with 232 additions and 270 deletions
+1 -4
View File
@@ -2,10 +2,6 @@
use crate::io::ErrorKind;
#[cfg(not(target_os = "espidf"))]
#[macro_use]
pub mod weak;
#[cfg(target_os = "fuchsia")]
pub mod fuchsia;
pub mod futex;
@@ -19,6 +15,7 @@
pub mod sync;
pub mod thread_parking;
pub mod time;
pub mod weak;
#[cfg(target_os = "espidf")]
pub fn init(_argc: isize, _argv: *const *const u8, _sigpipe: u8) {}
+17 -28
View File
@@ -69,7 +69,6 @@ mod imp {
use super::Handler;
use super::thread_info::{delete_current_info, set_current_info, with_current_info};
use crate::ops::Range;
use crate::sync::OnceLock;
use crate::sync::atomic::{Atomic, AtomicBool, AtomicPtr, AtomicUsize, Ordering};
use crate::sys::pal::unix::os;
use crate::{io, mem, panic, ptr};
@@ -396,6 +395,10 @@ unsafe fn install_main_guard() -> Option<Range<usize>> {
} else if cfg!(all(target_os = "linux", target_env = "musl")) {
install_main_guard_linux_musl(page_size)
} else if cfg!(target_os = "freebsd") {
#[cfg(not(target_os = "freebsd"))]
return None;
// The FreeBSD code cannot be checked on non-BSDs.
#[cfg(target_os = "freebsd")]
install_main_guard_freebsd(page_size)
} else if cfg!(any(target_os = "netbsd", target_os = "openbsd")) {
install_main_guard_bsds(page_size)
@@ -432,6 +435,7 @@ unsafe fn install_main_guard_linux_musl(_page_size: usize) -> Option<Range<usize
}
#[forbid(unsafe_op_in_unsafe_fn)]
#[cfg(target_os = "freebsd")]
unsafe fn install_main_guard_freebsd(page_size: usize) -> Option<Range<usize>> {
// FreeBSD's stack autogrows, and optionally includes a guard page
// at the bottom. If we try to remap the bottom of the stack
@@ -443,38 +447,23 @@ unsafe fn install_main_guard_freebsd(page_size: usize) -> Option<Range<usize>> {
// by the security.bsd.stack_guard_page sysctl.
// By default it is 1, checking once is enough since it is
// a boot time config value.
static PAGES: OnceLock<usize> = OnceLock::new();
static PAGES: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
let pages = PAGES.get_or_init(|| {
use crate::sys::weak::dlsym;
dlsym!(
fn sysctlbyname(
name: *const libc::c_char,
oldp: *mut libc::c_void,
oldlenp: *mut libc::size_t,
newp: *const libc::c_void,
newlen: libc::size_t,
) -> libc::c_int;
);
let mut guard: usize = 0;
let mut size = size_of_val(&guard);
let oid = c"security.bsd.stack_guard_page";
match sysctlbyname.get() {
Some(fcn)
if unsafe {
fcn(
oid.as_ptr(),
(&raw mut guard).cast(),
&raw mut size,
ptr::null_mut(),
0,
) == 0
} =>
{
guard
}
_ => 1,
}
let r = unsafe {
libc::sysctlbyname(
oid.as_ptr(),
(&raw mut guard).cast(),
&raw mut size,
ptr::null_mut(),
0,
)
};
if r == 0 { guard } else { 1 }
});
Some(guardaddr..guardaddr + pages * page_size)
}
-225
View File
@@ -1,225 +0,0 @@
//! Support for "weak linkage" to symbols on Unix
//!
//! Some I/O operations we do in std require newer versions of OSes but we need
//! to maintain binary compatibility with older releases for now. In order to
//! use the new functionality when available we use this module for detection.
//!
//! One option to use here is weak linkage, but that is unfortunately only
//! really workable with ELF. Otherwise, use dlsym to get the symbol value at
//! runtime. This is also done for compatibility with older versions of glibc,
//! and to avoid creating dependencies on GLIBC_PRIVATE symbols. It assumes that
//! we've been dynamically linked to the library the symbol comes from, but that
//! is currently always the case for things like libpthread/libc.
//!
//! A long time ago this used weak linkage for the __pthread_get_minstack
//! symbol, but that caused Debian to detect an unnecessarily strict versioned
//! dependency on libc6 (#23628) because it is GLIBC_PRIVATE. We now use `dlsym`
//! for a runtime lookup of that symbol to avoid the ELF versioned dependency.
// There are a variety of `#[cfg]`s controlling which targets are involved in
// each instance of `weak!` and `syscall!`. Rather than trying to unify all of
// that, we'll just allow that some unix targets don't use this module at all.
#![allow(dead_code, unused_macros)]
#![forbid(unsafe_op_in_unsafe_fn)]
use crate::ffi::{CStr, c_char, c_void};
use crate::marker::{FnPtr, PhantomData};
use crate::sync::atomic::{Atomic, AtomicPtr, Ordering};
use crate::{mem, ptr};
// We currently only test `dlsym!`, but that doesn't work on all platforms, so
// we gate the tests to only the platforms where it is actually used.
//
// FIXME(joboet): add more tests, reorganise the whole module and get rid of
// `#[allow(dead_code, unused_macros)]`.
#[cfg(any(
target_vendor = "apple",
all(target_os = "linux", target_env = "gnu"),
target_os = "freebsd",
))]
#[cfg(test)]
mod tests;
// We can use true weak linkage on ELF targets.
#[cfg(all(unix, not(target_vendor = "apple")))]
pub(crate) macro weak {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
let ref $name: ExternWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
unsafe extern "C" {
#[linkage = "extern_weak"]
static $name: Option<unsafe extern "C" fn($($t),*) -> $ret>;
}
#[allow(unused_unsafe)]
ExternWeak::new(unsafe { $name })
};
)
}
// On non-ELF targets, use the dlsym approximation of weak linkage.
#[cfg(target_vendor = "apple")]
pub(crate) use self::dlsym as weak;
pub(crate) struct ExternWeak<F: Copy> {
weak_ptr: Option<F>,
}
impl<F: Copy> ExternWeak<F> {
#[inline]
pub(crate) fn new(weak_ptr: Option<F>) -> Self {
ExternWeak { weak_ptr }
}
#[inline]
pub(crate) fn get(&self) -> Option<F> {
self.weak_ptr
}
}
pub(crate) macro dlsym {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
dlsym!(
#[link_name = stringify!($name)]
fn $name($($param : $t),*) -> $ret;
);
),
(
#[link_name = $sym:expr]
fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;
) => (
static DLSYM: DlsymWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
let Ok(name) = CStr::from_bytes_with_nul(concat!($sym, '\0').as_bytes()) else {
panic!("symbol name may not contain NUL")
};
// SAFETY: Whoever calls the function pointer returned by `get()`
// is responsible for ensuring that the signature is correct. Just
// like with extern blocks, this is syntactically enforced by making
// the function pointer be unsafe.
unsafe { DlsymWeak::new(name) }
};
let $name = &DLSYM;
)
}
pub(crate) struct DlsymWeak<F> {
/// A pointer to the nul-terminated name of the symbol.
// Use a pointer instead of `&'static CStr` to save space.
name: *const c_char,
func: Atomic<*mut libc::c_void>,
_marker: PhantomData<F>,
}
impl<F: FnPtr> DlsymWeak<F> {
/// # Safety
///
/// If the signature of `F` does not match the signature of the symbol (if
/// it exists), calling the function pointer returned by `get()` is
/// undefined behaviour.
pub(crate) const unsafe fn new(name: &'static CStr) -> Self {
DlsymWeak {
name: name.as_ptr(),
func: AtomicPtr::new(ptr::without_provenance_mut(1)),
_marker: PhantomData,
}
}
#[inline]
pub(crate) fn get(&self) -> Option<F> {
// The caller is presumably going to read through this value
// (by calling the function we've dlsymed). This means we'd
// need to have loaded it with at least C11's consume
// ordering in order to be guaranteed that the data we read
// from the pointer isn't from before the pointer was
// stored. Rust has no equivalent to memory_order_consume,
// so we use an acquire load (sorry, ARM).
//
// Now, in practice this likely isn't needed even on CPUs
// where relaxed and consume mean different things. The
// symbols we're loading are probably present (or not) at
// init, and even if they aren't the runtime dynamic loader
// is extremely likely have sufficient barriers internally
// (possibly implicitly, for example the ones provided by
// invoking `mprotect`).
//
// That said, none of that's *guaranteed*, so we use acquire.
match self.func.load(Ordering::Acquire) {
func if func.addr() == 1 => self.initialize(),
func if func.is_null() => None,
// SAFETY:
// `func` is not null and `F` implements `FnPtr`, thus this
// transmutation is well-defined. It is the responsibility of the
// creator of this `DlsymWeak` to ensure that calling the resulting
// function pointer does not result in undefined behaviour (though
// the `dlsym!` macro delegates this responsibility to the caller
// of the function by using `unsafe` function pointers).
// FIXME: use `transmute` once it stops complaining about generics.
func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }),
}
}
// Cold because it should only happen during first-time initialization.
#[cold]
fn initialize(&self) -> Option<F> {
// SAFETY: `self.name` was created from a `&'static CStr` and is
// therefore a valid C string pointer.
let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) };
// This synchronizes with the acquire load in `get`.
self.func.store(val, Ordering::Release);
if val.is_null() {
None
} else {
// SAFETY: see the comment in `get`.
// FIXME: use `transmute` once it stops complaining about generics.
Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) })
}
}
}
unsafe impl<F> Send for DlsymWeak<F> {}
unsafe impl<F> Sync for DlsymWeak<F> {}
#[cfg(not(any(target_os = "linux", target_os = "android")))]
pub(crate) macro syscall {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
unsafe fn $name($($param: $t),*) -> $ret {
weak!(fn $name($($param: $t),*) -> $ret;);
if let Some(fun) = $name.get() {
unsafe { fun($($param),*) }
} else {
super::os::set_errno(libc::ENOSYS);
-1
}
}
)
}
#[cfg(any(target_os = "linux", target_os = "android"))]
pub(crate) macro syscall {
(
fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;
) => (
unsafe fn $name($($param: $t),*) -> $ret {
weak!(fn $name($($param: $t),*) -> $ret;);
// Use a weak symbol from libc when possible, allowing `LD_PRELOAD`
// interposition, but if it's not found just use a raw syscall.
if let Some(fun) = $name.get() {
unsafe { fun($($param),*) }
} else {
unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret }
}
}
)
}
#[cfg(any(target_os = "linux", target_os = "android"))]
pub(crate) macro raw_syscall {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
unsafe fn $name($($param: $t),*) -> $ret {
unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret }
}
)
}
+104
View File
@@ -0,0 +1,104 @@
use crate::ffi::{CStr, c_char, c_void};
use crate::marker::{FnPtr, PhantomData};
use crate::sync::atomic::{Atomic, AtomicPtr, Ordering};
use crate::{mem, ptr};
#[cfg(test)]
#[path = "./tests.rs"]
mod tests;
pub(crate) macro weak {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
static DLSYM: DlsymWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
let Ok(name) = CStr::from_bytes_with_nul(concat!(stringify!($name), '\0').as_bytes()) else {
panic!("symbol name may not contain NUL")
};
// SAFETY: Whoever calls the function pointer returned by `get()`
// is responsible for ensuring that the signature is correct. Just
// like with extern blocks, this is syntactically enforced by making
// the function pointer be unsafe.
unsafe { DlsymWeak::new(name) }
};
let $name = &DLSYM;
)
}
pub(crate) struct DlsymWeak<F> {
/// A pointer to the nul-terminated name of the symbol.
// Use a pointer instead of `&'static CStr` to save space.
name: *const c_char,
func: Atomic<*mut libc::c_void>,
_marker: PhantomData<F>,
}
impl<F: FnPtr> DlsymWeak<F> {
/// # Safety
///
/// If the signature of `F` does not match the signature of the symbol (if
/// it exists), calling the function pointer returned by `get()` is
/// undefined behaviour.
pub const unsafe fn new(name: &'static CStr) -> Self {
DlsymWeak {
name: name.as_ptr(),
func: AtomicPtr::new(ptr::without_provenance_mut(1)),
_marker: PhantomData,
}
}
#[inline]
pub fn get(&self) -> Option<F> {
// The caller is presumably going to read through this value
// (by calling the function we've dlsymed). This means we'd
// need to have loaded it with at least C11's consume
// ordering in order to be guaranteed that the data we read
// from the pointer isn't from before the pointer was
// stored. Rust has no equivalent to memory_order_consume,
// so we use an acquire load (sorry, ARM).
//
// Now, in practice this likely isn't needed even on CPUs
// where relaxed and consume mean different things. The
// symbols we're loading are probably present (or not) at
// init, and even if they aren't the runtime dynamic loader
// is extremely likely have sufficient barriers internally
// (possibly implicitly, for example the ones provided by
// invoking `mprotect`).
//
// That said, none of that's *guaranteed*, so we use acquire.
match self.func.load(Ordering::Acquire) {
func if func.addr() == 1 => self.initialize(),
func if func.is_null() => None,
// SAFETY:
// `func` is not null and `F` implements `FnPtr`, thus this
// transmutation is well-defined. It is the responsibility of the
// creator of this `DlsymWeak` to ensure that calling the resulting
// function pointer does not result in undefined behaviour (though
// the `weak!` macro delegates this responsibility to the caller
// of the function by using `unsafe` function pointers).
// FIXME: use `transmute` once it stops complaining about generics.
func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }),
}
}
// Cold because it should only happen during first-time initialization.
#[cold]
fn initialize(&self) -> Option<F> {
// SAFETY: `self.name` was created from a `&'static CStr` and is
// therefore a valid C string pointer.
let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) };
// This synchronizes with the acquire load in `get`.
self.func.store(val, Ordering::Release);
if val.is_null() {
None
} else {
// SAFETY: see the comment in `get`.
// FIXME: use `transmute` once it stops complaining about generics.
Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) })
}
}
}
unsafe impl<F> Send for DlsymWeak<F> {}
unsafe impl<F> Sync for DlsymWeak<F> {}
+52
View File
@@ -0,0 +1,52 @@
//! Support for "weak linkage" to symbols on Unix
//!
//! Some I/O operations we do in std require newer versions of OSes but we need
//! to maintain binary compatibility with older releases for now. In order to
//! use the new functionality when available we use this module for detection.
//!
//! One option to use here is weak linkage, but that is unfortunately only
//! really workable with ELF. Otherwise, use dlsym to get the symbol value at
//! runtime. This is also done for compatibility with older versions of glibc,
//! and to avoid creating dependencies on GLIBC_PRIVATE symbols. It assumes that
//! we've been dynamically linked to the library the symbol comes from, but that
//! is currently always the case for things like libpthread/libc.
//!
//! A long time ago this used weak linkage for the __pthread_get_minstack
//! symbol, but that caused Debian to detect an unnecessarily strict versioned
//! dependency on libc6 (#23628) because it is GLIBC_PRIVATE. We now use `dlsym`
//! for a runtime lookup of that symbol to avoid the ELF versioned dependency.
#![forbid(unsafe_op_in_unsafe_fn)]
cfg_select! {
// On non-ELF targets, use the dlsym approximation of weak linkage.
target_vendor = "apple" => {
mod dlsym;
pub(crate) use dlsym::weak;
}
// Some targets don't need and support weak linkage at all...
target_os = "espidf" => {}
// ... but ELF targets support true weak linkage.
_ => {
// There are a variety of `#[cfg]`s controlling which targets are involved in
// each instance of `weak!`. Rather than trying to unify all of
// that, we'll just allow that some unix targets don't use this macro at all.
#[cfg_attr(not(target_os = "linux"), allow(unused_macros, dead_code))]
mod weak_linkage;
#[cfg_attr(not(target_os = "linux"), allow(unused_imports))]
pub(crate) use weak_linkage::weak;
}
}
// GNU/Linux needs the `dlsym` variant to avoid linking to private glibc symbols.
#[cfg(all(target_os = "linux", target_env = "gnu"))]
mod dlsym;
#[cfg(all(target_os = "linux", target_env = "gnu"))]
pub(crate) use dlsym::weak as dlsym;
#[cfg(any(target_os = "android", target_os = "linux"))]
mod syscall;
#[cfg(any(target_os = "android", target_os = "linux"))]
pub(crate) use syscall::syscall;
@@ -0,0 +1,19 @@
use super::weak;
pub(crate) macro syscall {
(
fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;
) => (
unsafe fn $name($($param: $t),*) -> $ret {
weak!(fn $name($($param: $t),*) -> $ret;);
// Use a weak symbol from libc when possible, allowing `LD_PRELOAD`
// interposition, but if it's not found just use a raw syscall.
if let Some(fun) = $name.get() {
unsafe { fun($($param),*) }
} else {
unsafe { libc::syscall(libc::${concat(SYS_, $name)}, $($param),*) as $ret }
}
}
)
}
+7 -13
View File
@@ -1,30 +1,24 @@
use super::*;
// This file is included by both implementations of `weak!`.
use super::weak;
use crate::ffi::{CStr, c_char};
#[test]
fn dlsym_existing() {
fn weak_existing() {
const TEST_STRING: &'static CStr = c"Ferris!";
// Try to find a symbol that definitely exists.
dlsym! {
weak! {
fn strlen(cs: *const c_char) -> usize;
}
dlsym! {
#[link_name = "strlen"]
fn custom_name(cs: *const c_char) -> usize;
}
let strlen = strlen.get().unwrap();
assert_eq!(unsafe { strlen(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes());
let custom_name = custom_name.get().unwrap();
assert_eq!(unsafe { custom_name(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes());
}
#[test]
fn dlsym_missing() {
fn weak_missing() {
// Try to find a symbol that definitely does not exist.
dlsym! {
weak! {
fn test_symbol_that_does_not_exist() -> i32;
}
@@ -0,0 +1,32 @@
#[cfg(test)]
#[path = "./tests.rs"]
mod tests;
pub(crate) macro weak {
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
let ref $name: ExternWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
unsafe extern "C" {
#[linkage = "extern_weak"]
static $name: Option<unsafe extern "C" fn($($t),*) -> $ret>;
}
#[allow(unused_unsafe)]
ExternWeak::new(unsafe { $name })
};
)
}
pub(crate) struct ExternWeak<F: Copy> {
weak_ptr: Option<F>,
}
impl<F: Copy> ExternWeak<F> {
#[inline]
pub fn new(weak_ptr: Option<F>) -> Self {
ExternWeak { weak_ptr }
}
#[inline]
pub fn get(&self) -> Option<F> {
self.weak_ptr
}
}