Rollup merge of #154865 - sunshowers:binary-search-test, r=Mark-Simulacrum

libtest: use binary search for --exact test filtering When `--exact` is passed in, use binary search for O(f log n) lookups instead of an O(n) linear scan, under the assumption that f << n (which is true for the most relevant cases). This is important for Miri, where the interpreted execution makes the linear scan very expensive. I measured this against a repo with 1000 empty tests, running `cargo +stage1 miri nextest run test_00` (100 tests) under hyperfine: * Before (linear scan): 49.7s ± 0.6s * After (binary search): 41.9s ± 0.2s (-15.7%) I also tried a few other variations (particularly swapping matching tests to the front of the list + truncating the list), but the index + swap_remove approach proved to be the fastest. Questions: - [ ] To be conservative, I've assumed that test_main can potentially receive an unsorted list of tests. Is this assumption correct?
2026-04-26 13:01:27 +03:00 · 2026-04-19 16:04:32 +02:00
parent b07688dc2e bbb9e3bb4a
commit 7b658037fd
5 changed files with 121 additions and 28 deletions
@@ -370,6 +370,8 @@ fn mk_tests_slice(cx: &TestCtxt<'_>, sp: Span) -> Box<ast::Expr> {
    let ecx = &cx.ext_cx;

    let mut tests = cx.test_cases.clone();
+    // Note that this sort is load-bearing: the libtest harness uses binary search to find tests by
+    // name.
    tests.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str()));

    ecx.expr_array_ref(
@@ -16,7 +16,7 @@
 use super::options::{Options, OutputFormat};
 use super::test_result::TestResult;
 use super::time::{TestExecTime, TestSuiteExecTime};
-use super::types::{NamePadding, TestDesc, TestDescAndFn};
+use super::types::{NamePadding, TestDesc, TestDescAndFn, TestList};
 use super::{filter_tests, run_tests, term};

 /// Generic wrapper over stdout.
@@ -170,7 +170,7 @@ fn current_test_count(&self) -> usize {
 }

 // List the tests to console, and optionally to logfile. Filters are honored.
-pub(crate) fn list_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> io::Result<()> {
+pub(crate) fn list_tests_console(opts: &TestOpts, tests: TestList) -> io::Result<()> {
    let output = match term::stdout() {
        None => OutputLocation::Raw(io::stdout().lock()),
        Some(t) => OutputLocation::Pretty(t),
@@ -186,7 +186,7 @@ pub(crate) fn list_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) ->
    let mut st = ConsoleTestDiscoveryState::new(opts)?;

    out.write_discovery_start()?;
-    for test in filter_tests(opts, tests).into_iter() {
+    for test in filter_tests(opts, tests) {
        use crate::TestFn::*;

        let TestDescAndFn { desc, testfn } = test;
@@ -307,8 +307,9 @@ pub(crate) fn get_formatter(opts: &TestOpts, max_name_len: usize) -> Box<dyn Out

 /// A simple console test runner.
 /// Runs provided tests reporting process and results to the stdout.
-pub fn run_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> io::Result<bool> {
+pub fn run_tests_console(opts: &TestOpts, tests: TestList) -> io::Result<bool> {
    let max_name_len = tests
+        .tests
        .iter()
        .max_by_key(|t| len_if_padded(t))
        .map(|t| t.desc.name.as_slice().len())
@@ -49,7 +49,7 @@ pub mod test {
    pub use crate::time::{TestExecTime, TestTimeOptions};
    pub use crate::types::{
        DynTestFn, DynTestName, StaticBenchFn, StaticTestFn, StaticTestName, TestDesc,
-        TestDescAndFn, TestId, TestName, TestType,
+        TestDescAndFn, TestId, TestList, TestListOrder, TestName, TestType,
    };
    pub use crate::{assert_test_result, filter_tests, run_test, test_main, test_main_static};
 }
@@ -106,6 +106,16 @@ pub fn test_main_with_exit_callback<F: FnOnce()>(
    tests: Vec<TestDescAndFn>,
    options: Option<Options>,
    exit_callback: F,
+) {
+    let tests = TestList::new(tests, TestListOrder::Unsorted);
+    test_main_inner(args, tests, options, exit_callback)
+}
+
+fn test_main_inner<F: FnOnce()>(
+    args: &[String],
+    tests: TestList,
+    options: Option<Options>,
+    exit_callback: F,
 ) {
    let mut opts = match cli::parse_opts(args) {
        Some(Ok(o)) => o,
@@ -180,7 +190,9 @@ pub fn test_main_with_exit_callback<F: FnOnce()>(
 pub fn test_main_static(tests: &[&TestDescAndFn]) {
    let args = env::args().collect::<Vec<_>>();
    let owned_tests: Vec<_> = tests.iter().map(make_owned_test).collect();
-    test_main(&args, owned_tests, None)
+    // Tests are sorted by name at compile time by mk_tests_slice.
+    let tests = TestList::new(owned_tests, TestListOrder::Sorted);
+    test_main_inner(&args, tests, None, || {})
 }

 /// A variant optimized for invocation with a static test vector.
@@ -229,7 +241,9 @@ pub fn test_main_static_abort(tests: &[&TestDescAndFn]) {

    let args = env::args().collect::<Vec<_>>();
    let owned_tests: Vec<_> = tests.iter().map(make_owned_test).collect();
-    test_main(&args, owned_tests, Some(Options::new().panic_abort(true)))
+    // Tests are sorted by name at compile time by mk_tests_slice.
+    let tests = TestList::new(owned_tests, TestListOrder::Sorted);
+    test_main_inner(&args, tests, Some(Options::new().panic_abort(true)), || {})
 }

 /// Clones static values for putting into a dynamic vector, which test_main()
@@ -298,7 +312,7 @@ fn total_len(&self) -> usize {

 pub fn run_tests<F>(
    opts: &TestOpts,
-    tests: Vec<TestDescAndFn>,
+    tests: TestList,
    mut notify_about_test_event: F,
 ) -> io::Result<()>
 where
@@ -334,7 +348,7 @@ struct TimeoutEntry {
        timeout: Instant,
    }

-    let tests_len = tests.len();
+    let tests_len = tests.tests.len();

    let mut filtered = FilteredTests { tests: Vec::new(), benches: Vec::new(), next_id: 0 };

@@ -512,25 +526,48 @@ fn calc_timeout(timeout_queue: &VecDeque<TimeoutEntry>) -> Option<Duration> {
    Ok(())
 }

-pub fn filter_tests(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> Vec<TestDescAndFn> {
+pub fn filter_tests(opts: &TestOpts, tests: TestList) -> Vec<TestDescAndFn> {
+    let TestList { tests, order } = tests;
    let mut filtered = tests;
-    let matches_filter = |test: &TestDescAndFn, filter: &str| {
-        let test_name = test.desc.name.as_slice();

-        match opts.filter_exact {
-            true => test_name == filter,
-            false => test_name.contains(filter),
-        }
-    };
-
-    // Remove tests that don't match the test filter
+    // Remove tests that don't match the test filter.
    if !opts.filters.is_empty() {
-        filtered.retain(|test| opts.filters.iter().any(|filter| matches_filter(test, filter)));
+        if opts.filter_exact && order == TestListOrder::Sorted {
+            // Let's say that `f` is the number of filters and `n` is the number
+            // of tests.
+            //
+            // The test array is sorted by name (guaranteed by the caller via
+            // TestListOrder::Sorted), so use binary search for O(f log n)
+            // exact-match lookups instead of an O(n) linear scan.
+            //
+            // This is important for Miri, where the interpreted execution makes
+            // the linear scan very expensive.
+            filtered = filter_exact_match(filtered, &opts.filters);
+        } else {
+            filtered.retain(|test| {
+                let test_name = test.desc.name.as_slice();
+                opts.filters.iter().any(|filter| {
+                    if opts.filter_exact {
+                        test_name == filter.as_str()
+                    } else {
+                        test_name.contains(filter.as_str())
+                    }
+                })
+            });
+        }
    }

    // Skip tests that match any of the skip filters
+    //
+    // After exact positive filtering above, the filtered set is small, so a
+    // linear scan is acceptable even under Miri.
    if !opts.skip.is_empty() {
-        filtered.retain(|test| !opts.skip.iter().any(|sf| matches_filter(test, sf)));
+        filtered.retain(|test| {
+            let name = test.desc.name.as_slice();
+            !opts.skip.iter().any(|sf| {
+                if opts.filter_exact { name == sf.as_str() } else { name.contains(sf.as_str()) }
+            })
+        });
    }

    // Excludes #[should_panic] tests
@@ -553,6 +590,30 @@ pub fn filter_tests(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> Vec<TestDescA
    filtered
 }

+/// Extract tests whose names exactly match one of the given `filters`, using
+/// binary search on the (assumed sorted) test list.
+fn filter_exact_match(mut tests: Vec<TestDescAndFn>, filters: &[String]) -> Vec<TestDescAndFn> {
+    // Binary search for each filter in the sorted test list.
+    let mut indexes: Vec<usize> = filters
+        .iter()
+        .filter_map(|f| tests.binary_search_by(|t| t.desc.name.as_slice().cmp(f.as_str())).ok())
+        .collect();
+    indexes.sort_unstable();
+    indexes.dedup();
+
+    // Extract matching tests. Process indexes in descending order so that
+    // swap_remove (which replaces the removed element with the last) does not
+    // invalidate indexes we haven't visited yet.
+    let mut result = Vec::with_capacity(indexes.len());
+    for &idx in indexes.iter().rev() {
+        result.push(tests.swap_remove(idx));
+    }
+    // Reverse to restore the original sorted order, since we extracted the
+    // matching tests in descending index order.
+    result.reverse();
+    result
+}
+
 pub fn convert_benchmarks_to_tests(tests: Vec<TestDescAndFn>) -> Vec<TestDescAndFn> {
    // convert benchmarks to tests, if we're not benchmarking them
    tests
@@ -477,7 +477,7 @@ fn filter_for_ignored_option() {
    opts.run_tests = true;
    opts.run_ignored = RunIgnored::Only;

-    let tests = one_ignored_one_unignored_test();
+    let tests = TestList::new(one_ignored_one_unignored_test(), TestListOrder::Unsorted);
    let filtered = filter_tests(&opts, tests);

    assert_eq!(filtered.len(), 1);
@@ -494,7 +494,7 @@ fn run_include_ignored_option() {
    opts.run_tests = true;
    opts.run_ignored = RunIgnored::Yes;

-    let tests = one_ignored_one_unignored_test();
+    let tests = TestList::new(one_ignored_one_unignored_test(), TestListOrder::Unsorted);
    let filtered = filter_tests(&opts, tests);

    assert_eq!(filtered.len(), 2);
@@ -527,7 +527,7 @@ fn exclude_should_panic_option() {
        testfn: DynTestFn(Box::new(move || Ok(()))),
    });

-    let filtered = filter_tests(&opts, tests);
+    let filtered = filter_tests(&opts, TestList::new(tests, TestListOrder::Unsorted));

    assert_eq!(filtered.len(), 2);
    assert!(filtered.iter().all(|test| test.desc.should_panic == ShouldPanic::No));
@@ -535,8 +535,8 @@ fn exclude_should_panic_option() {

 #[test]
 fn exact_filter_match() {
-    fn tests() -> Vec<TestDescAndFn> {
-        ["base", "base::test", "base::test1", "base::test2"]
+    fn tests() -> TestList {
+        let tests = ["base", "base::test", "base::test1", "base::test2"]
            .into_iter()
            .map(|name| TestDescAndFn {
                desc: TestDesc {
@@ -555,7 +555,8 @@ fn tests() -> Vec<TestDescAndFn> {
                },
                testfn: DynTestFn(Box::new(move || Ok(()))),
            })
-            .collect()
+            .collect();
+        TestList::new(tests, TestListOrder::Sorted)
    }

    let substr =
@@ -908,7 +909,8 @@ fn f(_: &mut Bencher) -> Result<(), String> {
        }
        Ok(())
    };
-    run_tests(&TestOpts { run_tests: true, ..TestOpts::new() }, vec![desc], notify).unwrap();
+    let tests = TestList::new(vec![desc], TestListOrder::Unsorted);
+    run_tests(&TestOpts { run_tests: true, ..TestOpts::new() }, tests, notify).unwrap();
    let result = rx.recv().unwrap().result;
    assert_eq!(result, TrFailed);
 }
@@ -284,3 +284,30 @@ pub const fn new_doctest(
        }
    }
 }
+
+/// Whether a [`TestList`]'s tests are known to be sorted by name.
+///
+/// When tests are sorted, `filter_tests` can use binary search for `--exact`
+/// matches instead of a linear scan.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TestListOrder {
+    /// Tests are sorted by name. This is guaranteed for tests generated by
+    /// `rustc --test` (see `mk_tests_slice` in
+    /// `compiler/rustc_builtin_macros/src/test_harness.rs`).
+    Sorted,
+    /// Test order is unknown; binary search must not be used.
+    Unsorted,
+}
+
+/// A list of tests, tagged with whether they are sorted by name.
+#[derive(Debug)]
+pub struct TestList {
+    pub tests: Vec<TestDescAndFn>,
+    pub order: TestListOrder,
+}
+
+impl TestList {
+    pub fn new(tests: Vec<TestDescAndFn>, order: TestListOrder) -> Self {
+        Self { tests, order }
+    }
+}