Skip empty archetypes and tables when iterating over queries (#4724)

# Objective Speed up queries that are fragmented over many empty archetypes and tables. ## Solution Add a early-out to check if the table or archetype is empty before iterating over it. This adds an extra branch for every archetype matched, but skips setting the archetype/table to the underlying state and any iteration over it. This may not be worth it for the default `Query::iter` and maybe even the `Query::for_each` implementations, but this definitely avoids scheduling unnecessary tasks in the `Query::par_for_each` case. Ideally, `matched_archetypes` should only contain archetypes where there's actually work to do, but this would add a `O(n)` flat cost to every call to `update_archetypes` that scales with the number of matched archetypes. TODO: Benchmark
2024-11-10 07:04:33 +00:00 · 2022-10-24 13:22:05 +00:00 · 2022-10-24 13:22:05 +00:00 · b508b5c7c7
commit b508b5c7c7
parent 7989cb2650
2 changed files with 261 additions and 0 deletions
--- a/benches/benches/bevy_ecs/empty_archetypes.rs
+++ b/benches/benches/bevy_ecs/empty_archetypes.rs
@ -0,0 +1,253 @@
+use bevy_ecs::{
+    component::Component,
+    prelude::*,
+    schedule::{Stage, SystemStage},
+    world::World,
+};
+use bevy_tasks::{ComputeTaskPool, TaskPool};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+criterion_group!(benches, empty_archetypes);
+criterion_main!(benches);
+
+#[derive(Component)]
+struct A<const N: u16>(f32);
+
+fn iter(
+    query: Query<(
+        &A<0>,
+        &A<1>,
+        &A<2>,
+        &A<3>,
+        &A<4>,
+        &A<5>,
+        &A<6>,
+        &A<7>,
+        &A<8>,
+        &A<9>,
+        &A<10>,
+        &A<11>,
+        &A<12>,
+    )>,
+) {
+    for comp in query.iter() {
+        black_box(comp);
+    }
+}
+
+fn for_each(
+    query: Query<(
+        &A<0>,
+        &A<1>,
+        &A<2>,
+        &A<3>,
+        &A<4>,
+        &A<5>,
+        &A<6>,
+        &A<7>,
+        &A<8>,
+        &A<9>,
+        &A<10>,
+        &A<11>,
+        &A<12>,
+    )>,
+) {
+    query.for_each(|comp| {
+        black_box(comp);
+    });
+}
+
+fn par_for_each(
+    task_pool: Res<ComputeTaskPool>,
+    query: Query<(
+        &A<0>,
+        &A<1>,
+        &A<2>,
+        &A<3>,
+        &A<4>,
+        &A<5>,
+        &A<6>,
+        &A<7>,
+        &A<8>,
+        &A<9>,
+        &A<10>,
+        &A<11>,
+        &A<12>,
+    )>,
+) {
+    query.par_for_each(&*task_pool, 64, |comp| {
+        black_box(comp);
+    });
+}
+
+fn setup(parallel: bool, setup: impl FnOnce(&mut SystemStage)) -> (World, SystemStage) {
+    let mut world = World::new();
+    let mut stage = SystemStage::parallel();
+    if parallel {
+        world.insert_resource(ComputeTaskPool(TaskPool::default()));
+    }
+    setup(&mut stage);
+    (world, stage)
+}
+
+/// create `count` entities with distinct archetypes
+fn add_archetypes(world: &mut World, count: u16) {
+    for i in 0..count {
+        let mut e = world.spawn();
+        e.insert(A::<0>(1.0));
+        e.insert(A::<1>(1.0));
+        e.insert(A::<2>(1.0));
+        e.insert(A::<3>(1.0));
+        e.insert(A::<4>(1.0));
+        e.insert(A::<5>(1.0));
+        e.insert(A::<6>(1.0));
+        e.insert(A::<7>(1.0));
+        e.insert(A::<8>(1.0));
+        e.insert(A::<9>(1.0));
+        e.insert(A::<10>(1.0));
+        e.insert(A::<11>(1.0));
+        e.insert(A::<12>(1.0));
+        if i & 1 << 1 != 0 {
+            e.insert(A::<13>(1.0));
+        }
+        if i & 1 << 2 != 0 {
+            e.insert(A::<14>(1.0));
+        }
+        if i & 1 << 3 != 0 {
+            e.insert(A::<15>(1.0));
+        }
+        if i & 1 << 4 != 0 {
+            e.insert(A::<16>(1.0));
+        }
+        if i & 1 << 5 != 0 {
+            e.insert(A::<18>(1.0));
+        }
+        if i & 1 << 6 != 0 {
+            e.insert(A::<19>(1.0));
+        }
+        if i & 1 << 7 != 0 {
+            e.insert(A::<20>(1.0));
+        }
+        if i & 1 << 8 != 0 {
+            e.insert(A::<21>(1.0));
+        }
+        if i & 1 << 9 != 0 {
+            e.insert(A::<22>(1.0));
+        }
+        if i & 1 << 10 != 0 {
+            e.insert(A::<23>(1.0));
+        }
+        if i & 1 << 11 != 0 {
+            e.insert(A::<24>(1.0));
+        }
+        if i & 1 << 12 != 0 {
+            e.insert(A::<25>(1.0));
+        }
+        if i & 1 << 13 != 0 {
+            e.insert(A::<26>(1.0));
+        }
+        if i & 1 << 14 != 0 {
+            e.insert(A::<27>(1.0));
+        }
+        if i & 1 << 15 != 0 {
+            e.insert(A::<28>(1.0));
+        }
+    }
+}
+
+fn empty_archetypes(criterion: &mut Criterion) {
+    let mut group = criterion.benchmark_group("empty_archetypes");
+    for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
+        let (mut world, mut stage) = setup(true, |stage| {
+            stage.add_system(iter);
+        });
+        add_archetypes(&mut world, archetype_count);
+        world.clear_entities();
+        let mut e = world.spawn();
+        e.insert(A::<0>(1.0));
+        e.insert(A::<1>(1.0));
+        e.insert(A::<2>(1.0));
+        e.insert(A::<3>(1.0));
+        e.insert(A::<4>(1.0));
+        e.insert(A::<5>(1.0));
+        e.insert(A::<6>(1.0));
+        e.insert(A::<7>(1.0));
+        e.insert(A::<8>(1.0));
+        e.insert(A::<9>(1.0));
+        e.insert(A::<10>(1.0));
+        e.insert(A::<11>(1.0));
+        e.insert(A::<12>(1.0));
+        stage.run(&mut world);
+        group.bench_with_input(
+            BenchmarkId::new("iter", archetype_count),
+            &archetype_count,
+            |bencher, &_| {
+                bencher.iter(|| {
+                    stage.run(&mut world);
+                })
+            },
+        );
+    }
+    for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
+        let (mut world, mut stage) = setup(true, |stage| {
+            stage.add_system(for_each);
+        });
+        add_archetypes(&mut world, archetype_count);
+        world.clear_entities();
+        let mut e = world.spawn();
+        e.insert(A::<0>(1.0));
+        e.insert(A::<1>(1.0));
+        e.insert(A::<2>(1.0));
+        e.insert(A::<3>(1.0));
+        e.insert(A::<4>(1.0));
+        e.insert(A::<5>(1.0));
+        e.insert(A::<6>(1.0));
+        e.insert(A::<7>(1.0));
+        e.insert(A::<8>(1.0));
+        e.insert(A::<9>(1.0));
+        e.insert(A::<10>(1.0));
+        e.insert(A::<11>(1.0));
+        e.insert(A::<12>(1.0));
+        stage.run(&mut world);
+        group.bench_with_input(
+            BenchmarkId::new("for_each", archetype_count),
+            &archetype_count,
+            |bencher, &_| {
+                bencher.iter(|| {
+                    stage.run(&mut world);
+                })
+            },
+        );
+    }
+    for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
+        let (mut world, mut stage) = setup(true, |stage| {
+            stage.add_system(par_for_each);
+        });
+        add_archetypes(&mut world, archetype_count);
+        world.clear_entities();
+        let mut e = world.spawn();
+        e.insert(A::<0>(1.0));
+        e.insert(A::<1>(1.0));
+        e.insert(A::<2>(1.0));
+        e.insert(A::<3>(1.0));
+        e.insert(A::<4>(1.0));
+        e.insert(A::<5>(1.0));
+        e.insert(A::<6>(1.0));
+        e.insert(A::<7>(1.0));
+        e.insert(A::<8>(1.0));
+        e.insert(A::<9>(1.0));
+        e.insert(A::<10>(1.0));
+        e.insert(A::<11>(1.0));
+        e.insert(A::<12>(1.0));
+        stage.run(&mut world);
+        group.bench_with_input(
+            BenchmarkId::new("par_for_each", archetype_count),
+            &archetype_count,
+            |bencher, &_| {
+                bencher.iter(|| {
+                    stage.run(&mut world);
+                })
+            },
+        );
+    }
+}
--- a/crates/bevy_ecs/src/query/state.rs
+++ b/crates/bevy_ecs/src/query/state.rs
@ -983,6 +983,10 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
                let tables = &world.storages().tables;
                for table_id in &self.matched_table_ids {
                    let table = &tables[*table_id];
+                    if table.is_empty() {
+                        continue;
+                    }
+
                    let mut offset = 0;
                    while offset < table.entity_count() {
                        let func = func.clone();
@ -1030,6 +1034,10 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
                for archetype_id in &self.matched_archetype_ids {
                    let mut offset = 0;
                    let archetype = &archetypes[*archetype_id];
+                    if archetype.is_empty() {
+                        continue;
+                    }
+
                    while offset < archetype.len() {
                        let func = func.clone();
                        let len = batch_size.min(archetype.len() - offset);