Skip empty archetypes and tables when iterating over queries (#4724)

# Objective
Speed up queries that are fragmented over many empty archetypes and tables.

## Solution
Add a early-out to check if the table or archetype is empty before iterating over it. This adds an extra branch for every archetype matched, but skips setting the archetype/table to the underlying state and any iteration over it.

This may not be worth it for the default `Query::iter` and maybe even the `Query::for_each` implementations, but this definitely avoids scheduling unnecessary tasks in the `Query::par_for_each` case.

Ideally, `matched_archetypes` should only contain archetypes where there's actually work to do, but this would add a `O(n)` flat cost to every call to `update_archetypes` that scales with the number of matched archetypes.

TODO: Benchmark
This commit is contained in:
James Liu 2022-10-24 13:22:05 +00:00
parent 7989cb2650
commit b508b5c7c7
2 changed files with 261 additions and 0 deletions

View file

@ -0,0 +1,253 @@
use bevy_ecs::{
component::Component,
prelude::*,
schedule::{Stage, SystemStage},
world::World,
};
use bevy_tasks::{ComputeTaskPool, TaskPool};
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
criterion_group!(benches, empty_archetypes);
criterion_main!(benches);
#[derive(Component)]
struct A<const N: u16>(f32);
fn iter(
query: Query<(
&A<0>,
&A<1>,
&A<2>,
&A<3>,
&A<4>,
&A<5>,
&A<6>,
&A<7>,
&A<8>,
&A<9>,
&A<10>,
&A<11>,
&A<12>,
)>,
) {
for comp in query.iter() {
black_box(comp);
}
}
fn for_each(
query: Query<(
&A<0>,
&A<1>,
&A<2>,
&A<3>,
&A<4>,
&A<5>,
&A<6>,
&A<7>,
&A<8>,
&A<9>,
&A<10>,
&A<11>,
&A<12>,
)>,
) {
query.for_each(|comp| {
black_box(comp);
});
}
fn par_for_each(
task_pool: Res<ComputeTaskPool>,
query: Query<(
&A<0>,
&A<1>,
&A<2>,
&A<3>,
&A<4>,
&A<5>,
&A<6>,
&A<7>,
&A<8>,
&A<9>,
&A<10>,
&A<11>,
&A<12>,
)>,
) {
query.par_for_each(&*task_pool, 64, |comp| {
black_box(comp);
});
}
fn setup(parallel: bool, setup: impl FnOnce(&mut SystemStage)) -> (World, SystemStage) {
let mut world = World::new();
let mut stage = SystemStage::parallel();
if parallel {
world.insert_resource(ComputeTaskPool(TaskPool::default()));
}
setup(&mut stage);
(world, stage)
}
/// create `count` entities with distinct archetypes
fn add_archetypes(world: &mut World, count: u16) {
for i in 0..count {
let mut e = world.spawn();
e.insert(A::<0>(1.0));
e.insert(A::<1>(1.0));
e.insert(A::<2>(1.0));
e.insert(A::<3>(1.0));
e.insert(A::<4>(1.0));
e.insert(A::<5>(1.0));
e.insert(A::<6>(1.0));
e.insert(A::<7>(1.0));
e.insert(A::<8>(1.0));
e.insert(A::<9>(1.0));
e.insert(A::<10>(1.0));
e.insert(A::<11>(1.0));
e.insert(A::<12>(1.0));
if i & 1 << 1 != 0 {
e.insert(A::<13>(1.0));
}
if i & 1 << 2 != 0 {
e.insert(A::<14>(1.0));
}
if i & 1 << 3 != 0 {
e.insert(A::<15>(1.0));
}
if i & 1 << 4 != 0 {
e.insert(A::<16>(1.0));
}
if i & 1 << 5 != 0 {
e.insert(A::<18>(1.0));
}
if i & 1 << 6 != 0 {
e.insert(A::<19>(1.0));
}
if i & 1 << 7 != 0 {
e.insert(A::<20>(1.0));
}
if i & 1 << 8 != 0 {
e.insert(A::<21>(1.0));
}
if i & 1 << 9 != 0 {
e.insert(A::<22>(1.0));
}
if i & 1 << 10 != 0 {
e.insert(A::<23>(1.0));
}
if i & 1 << 11 != 0 {
e.insert(A::<24>(1.0));
}
if i & 1 << 12 != 0 {
e.insert(A::<25>(1.0));
}
if i & 1 << 13 != 0 {
e.insert(A::<26>(1.0));
}
if i & 1 << 14 != 0 {
e.insert(A::<27>(1.0));
}
if i & 1 << 15 != 0 {
e.insert(A::<28>(1.0));
}
}
}
fn empty_archetypes(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("empty_archetypes");
for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
let (mut world, mut stage) = setup(true, |stage| {
stage.add_system(iter);
});
add_archetypes(&mut world, archetype_count);
world.clear_entities();
let mut e = world.spawn();
e.insert(A::<0>(1.0));
e.insert(A::<1>(1.0));
e.insert(A::<2>(1.0));
e.insert(A::<3>(1.0));
e.insert(A::<4>(1.0));
e.insert(A::<5>(1.0));
e.insert(A::<6>(1.0));
e.insert(A::<7>(1.0));
e.insert(A::<8>(1.0));
e.insert(A::<9>(1.0));
e.insert(A::<10>(1.0));
e.insert(A::<11>(1.0));
e.insert(A::<12>(1.0));
stage.run(&mut world);
group.bench_with_input(
BenchmarkId::new("iter", archetype_count),
&archetype_count,
|bencher, &_| {
bencher.iter(|| {
stage.run(&mut world);
})
},
);
}
for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
let (mut world, mut stage) = setup(true, |stage| {
stage.add_system(for_each);
});
add_archetypes(&mut world, archetype_count);
world.clear_entities();
let mut e = world.spawn();
e.insert(A::<0>(1.0));
e.insert(A::<1>(1.0));
e.insert(A::<2>(1.0));
e.insert(A::<3>(1.0));
e.insert(A::<4>(1.0));
e.insert(A::<5>(1.0));
e.insert(A::<6>(1.0));
e.insert(A::<7>(1.0));
e.insert(A::<8>(1.0));
e.insert(A::<9>(1.0));
e.insert(A::<10>(1.0));
e.insert(A::<11>(1.0));
e.insert(A::<12>(1.0));
stage.run(&mut world);
group.bench_with_input(
BenchmarkId::new("for_each", archetype_count),
&archetype_count,
|bencher, &_| {
bencher.iter(|| {
stage.run(&mut world);
})
},
);
}
for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] {
let (mut world, mut stage) = setup(true, |stage| {
stage.add_system(par_for_each);
});
add_archetypes(&mut world, archetype_count);
world.clear_entities();
let mut e = world.spawn();
e.insert(A::<0>(1.0));
e.insert(A::<1>(1.0));
e.insert(A::<2>(1.0));
e.insert(A::<3>(1.0));
e.insert(A::<4>(1.0));
e.insert(A::<5>(1.0));
e.insert(A::<6>(1.0));
e.insert(A::<7>(1.0));
e.insert(A::<8>(1.0));
e.insert(A::<9>(1.0));
e.insert(A::<10>(1.0));
e.insert(A::<11>(1.0));
e.insert(A::<12>(1.0));
stage.run(&mut world);
group.bench_with_input(
BenchmarkId::new("par_for_each", archetype_count),
&archetype_count,
|bencher, &_| {
bencher.iter(|| {
stage.run(&mut world);
})
},
);
}
}

View file

@ -983,6 +983,10 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
let tables = &world.storages().tables;
for table_id in &self.matched_table_ids {
let table = &tables[*table_id];
if table.is_empty() {
continue;
}
let mut offset = 0;
while offset < table.entity_count() {
let func = func.clone();
@ -1030,6 +1034,10 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
for archetype_id in &self.matched_archetype_ids {
let mut offset = 0;
let archetype = &archetypes[*archetype_id];
if archetype.is_empty() {
continue;
}
while offset < archetype.len() {
let func = func.clone();
let len = batch_size.min(archetype.len() - offset);