Opportunistically use dense iteration for archetypal iteration (#14049)

# Objective
- currently, bevy employs sparse iteration if any of the target
components in the query are stored in a sparse set. it may lead to
increased cache misses in some cases, potentially impacting performance.
- partial fixes #12381 

## Solution

- use dense iteration when an archetype and its table have the same
entity count.
- to avoid introducing complicate unsafe noise, this pr only implement
for `for_each ` style iteration.
- added a benchmark to test performance for hybrid iteration.


## Performance


![image](https://github.com/bevyengine/bevy/assets/45868716/5cce13cf-6ff2-4861-9576-e75edc63bd46)

nearly 2x win in specific scenarios, and no performance degradation in
other test cases.

---------

Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com>
Co-authored-by: Christian Hughes <9044780+ItsDoot@users.noreply.github.com>
This commit is contained in:
re0312 2024-08-03 05:18:15 +08:00 committed by GitHub
parent 7c80ae7313
commit 8235daaea0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 129 additions and 2 deletions

View file

@ -0,0 +1,43 @@
use bevy_ecs::prelude::*;
use rand::{prelude::SliceRandom, SeedableRng};
use rand_chacha::ChaCha8Rng;
#[derive(Component, Copy, Clone)]
struct TableData(f32);
#[derive(Component, Copy, Clone)]
#[component(storage = "SparseSet")]
struct SparseData(f32);
fn deterministic_rand() -> ChaCha8Rng {
ChaCha8Rng::seed_from_u64(42)
}
pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>);
impl<'w> Benchmark<'w> {
pub fn new() -> Self {
let mut world = World::new();
let mut v = vec![];
for _ in 0..10000 {
world.spawn((TableData(0.0), SparseData(0.0))).id();
v.push(world.spawn(TableData(0.)).id());
}
// by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration.
v.shuffle(&mut deterministic_rand());
for e in v.into_iter() {
world.entity_mut(e).despawn();
}
let query = world.query::<(&mut TableData, &SparseData)>();
Self(world, query)
}
#[inline(never)]
pub fn run(&mut self) {
self.1
.iter_mut(&mut self.0)
.for_each(|(mut v1, v2)| v1.0 += v2.0)
}
}

View file

@ -11,6 +11,7 @@ mod iter_frag_wide;
mod iter_frag_wide_sparse;
mod iter_simple;
mod iter_simple_foreach;
mod iter_simple_foreach_hybrid;
mod iter_simple_foreach_sparse_set;
mod iter_simple_foreach_wide;
mod iter_simple_foreach_wide_sparse_set;
@ -71,6 +72,10 @@ fn iter_simple(c: &mut Criterion) {
let mut bench = iter_simple_foreach_wide_sparse_set::Benchmark::new();
b.iter(move || bench.run());
});
group.bench_function("foreach_hybrid", |b| {
let mut bench = iter_simple_foreach_hybrid::Benchmark::new();
b.iter(move || bench.run());
});
group.finish();
}

View file

@ -167,6 +167,70 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
accum
}
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
/// from an archetype which has the same entity count as its table.
///
/// # Safety
/// - all `indices` must be in `[0, archetype.len())`.
/// - `archetype` must match D and F
/// - `archetype` must have the same length with it's table.
/// - Either `D::IS_DENSE` or `F::IS_DENSE` must be false.
#[inline]
pub(super) unsafe fn fold_over_dense_archetype_range<B, Func>(
&mut self,
mut accum: B,
func: &mut Func,
archetype: &'w Archetype,
rows: Range<usize>,
) -> B
where
Func: FnMut(B, D::Item<'w>) -> B,
{
assert!(
rows.end <= u32::MAX as usize,
"TableRow is only valid up to u32::MAX"
);
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();
debug_assert!(
archetype.len() == table.entity_count(),
"archetype and it's table must have the same length. "
);
D::set_archetype(
&mut self.cursor.fetch,
&self.query_state.fetch_state,
archetype,
table,
);
F::set_archetype(
&mut self.cursor.filter,
&self.query_state.filter_state,
archetype,
table,
);
let entities = table.entities();
for row in rows {
// SAFETY: Caller assures `row` in range of the current archetype.
let entity = unsafe { *entities.get_unchecked(row) };
let row = TableRow::from_usize(row);
// SAFETY: set_table was called prior.
// Caller assures `row` in range of the current archetype.
let filter_matched = unsafe { F::filter_fetch(&mut self.cursor.filter, entity, row) };
if !filter_matched {
continue;
}
// SAFETY: set_table was called prior.
// Caller assures `row` in range of the current archetype.
let item = D::fetch(&mut self.cursor.fetch, entity, row);
accum = func(accum, item);
}
accum
}
/// Sorts all query items into a new iterator, using the query lens as a key.
///
/// This sort is stable (i.e., does not reorder equal elements).
@ -914,12 +978,27 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F>
let archetype =
// SAFETY: Matched archetype IDs are guaranteed to still exist.
unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() };
accum =
// SAFETY: Matched table IDs are guaranteed to still exist.
let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() };
// When an archetype and its table have equal entity counts, dense iteration can be safely used.
// this leverages cache locality to optimize performance.
if table.entity_count() == archetype.len() {
accum =
// SAFETY:
// - The fetched archetype matches both D and F
// - The provided archetype and its' table have the same length.
// - The provided range is equivalent to [0, archetype.len)
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
} else {
accum =
// SAFETY:
// - The fetched archetype matches both D and F
// - The provided range is equivalent to [0, archetype.len)
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) };
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
}
}
}
accum