mirror of
https://github.com/bevyengine/bevy
synced 2024-11-10 07:04:33 +00:00
Opportunistically use dense iteration for archetypal iteration (#14049)
# Objective - currently, bevy employs sparse iteration if any of the target components in the query are stored in a sparse set. it may lead to increased cache misses in some cases, potentially impacting performance. - partial fixes #12381 ## Solution - use dense iteration when an archetype and its table have the same entity count. - to avoid introducing complicate unsafe noise, this pr only implement for `for_each ` style iteration. - added a benchmark to test performance for hybrid iteration. ## Performance ![image](https://github.com/bevyengine/bevy/assets/45868716/5cce13cf-6ff2-4861-9576-e75edc63bd46) nearly 2x win in specific scenarios, and no performance degradation in other test cases. --------- Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com> Co-authored-by: Christian Hughes <9044780+ItsDoot@users.noreply.github.com>
This commit is contained in:
parent
7c80ae7313
commit
8235daaea0
3 changed files with 129 additions and 2 deletions
|
@ -0,0 +1,43 @@
|
|||
use bevy_ecs::prelude::*;
|
||||
use rand::{prelude::SliceRandom, SeedableRng};
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
#[derive(Component, Copy, Clone)]
|
||||
struct TableData(f32);
|
||||
|
||||
#[derive(Component, Copy, Clone)]
|
||||
#[component(storage = "SparseSet")]
|
||||
struct SparseData(f32);
|
||||
|
||||
fn deterministic_rand() -> ChaCha8Rng {
|
||||
ChaCha8Rng::seed_from_u64(42)
|
||||
}
|
||||
pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>);
|
||||
|
||||
impl<'w> Benchmark<'w> {
|
||||
pub fn new() -> Self {
|
||||
let mut world = World::new();
|
||||
|
||||
let mut v = vec![];
|
||||
for _ in 0..10000 {
|
||||
world.spawn((TableData(0.0), SparseData(0.0))).id();
|
||||
v.push(world.spawn(TableData(0.)).id());
|
||||
}
|
||||
|
||||
// by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration.
|
||||
v.shuffle(&mut deterministic_rand());
|
||||
for e in v.into_iter() {
|
||||
world.entity_mut(e).despawn();
|
||||
}
|
||||
|
||||
let query = world.query::<(&mut TableData, &SparseData)>();
|
||||
Self(world, query)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub fn run(&mut self) {
|
||||
self.1
|
||||
.iter_mut(&mut self.0)
|
||||
.for_each(|(mut v1, v2)| v1.0 += v2.0)
|
||||
}
|
||||
}
|
|
@ -11,6 +11,7 @@ mod iter_frag_wide;
|
|||
mod iter_frag_wide_sparse;
|
||||
mod iter_simple;
|
||||
mod iter_simple_foreach;
|
||||
mod iter_simple_foreach_hybrid;
|
||||
mod iter_simple_foreach_sparse_set;
|
||||
mod iter_simple_foreach_wide;
|
||||
mod iter_simple_foreach_wide_sparse_set;
|
||||
|
@ -71,6 +72,10 @@ fn iter_simple(c: &mut Criterion) {
|
|||
let mut bench = iter_simple_foreach_wide_sparse_set::Benchmark::new();
|
||||
b.iter(move || bench.run());
|
||||
});
|
||||
group.bench_function("foreach_hybrid", |b| {
|
||||
let mut bench = iter_simple_foreach_hybrid::Benchmark::new();
|
||||
b.iter(move || bench.run());
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
|
|
|
@ -167,6 +167,70 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
|
|||
accum
|
||||
}
|
||||
|
||||
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
|
||||
/// from an archetype which has the same entity count as its table.
|
||||
///
|
||||
/// # Safety
|
||||
/// - all `indices` must be in `[0, archetype.len())`.
|
||||
/// - `archetype` must match D and F
|
||||
/// - `archetype` must have the same length with it's table.
|
||||
/// - Either `D::IS_DENSE` or `F::IS_DENSE` must be false.
|
||||
#[inline]
|
||||
pub(super) unsafe fn fold_over_dense_archetype_range<B, Func>(
|
||||
&mut self,
|
||||
mut accum: B,
|
||||
func: &mut Func,
|
||||
archetype: &'w Archetype,
|
||||
rows: Range<usize>,
|
||||
) -> B
|
||||
where
|
||||
Func: FnMut(B, D::Item<'w>) -> B,
|
||||
{
|
||||
assert!(
|
||||
rows.end <= u32::MAX as usize,
|
||||
"TableRow is only valid up to u32::MAX"
|
||||
);
|
||||
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();
|
||||
|
||||
debug_assert!(
|
||||
archetype.len() == table.entity_count(),
|
||||
"archetype and it's table must have the same length. "
|
||||
);
|
||||
|
||||
D::set_archetype(
|
||||
&mut self.cursor.fetch,
|
||||
&self.query_state.fetch_state,
|
||||
archetype,
|
||||
table,
|
||||
);
|
||||
F::set_archetype(
|
||||
&mut self.cursor.filter,
|
||||
&self.query_state.filter_state,
|
||||
archetype,
|
||||
table,
|
||||
);
|
||||
let entities = table.entities();
|
||||
for row in rows {
|
||||
// SAFETY: Caller assures `row` in range of the current archetype.
|
||||
let entity = unsafe { *entities.get_unchecked(row) };
|
||||
let row = TableRow::from_usize(row);
|
||||
|
||||
// SAFETY: set_table was called prior.
|
||||
// Caller assures `row` in range of the current archetype.
|
||||
let filter_matched = unsafe { F::filter_fetch(&mut self.cursor.filter, entity, row) };
|
||||
if !filter_matched {
|
||||
continue;
|
||||
}
|
||||
|
||||
// SAFETY: set_table was called prior.
|
||||
// Caller assures `row` in range of the current archetype.
|
||||
let item = D::fetch(&mut self.cursor.fetch, entity, row);
|
||||
|
||||
accum = func(accum, item);
|
||||
}
|
||||
accum
|
||||
}
|
||||
|
||||
/// Sorts all query items into a new iterator, using the query lens as a key.
|
||||
///
|
||||
/// This sort is stable (i.e., does not reorder equal elements).
|
||||
|
@ -914,12 +978,27 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F>
|
|||
let archetype =
|
||||
// SAFETY: Matched archetype IDs are guaranteed to still exist.
|
||||
unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() };
|
||||
accum =
|
||||
// SAFETY: Matched table IDs are guaranteed to still exist.
|
||||
let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() };
|
||||
|
||||
// When an archetype and its table have equal entity counts, dense iteration can be safely used.
|
||||
// this leverages cache locality to optimize performance.
|
||||
if table.entity_count() == archetype.len() {
|
||||
accum =
|
||||
// SAFETY:
|
||||
// - The fetched archetype matches both D and F
|
||||
// - The provided archetype and its' table have the same length.
|
||||
// - The provided range is equivalent to [0, archetype.len)
|
||||
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
|
||||
unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
|
||||
} else {
|
||||
accum =
|
||||
// SAFETY:
|
||||
// - The fetched archetype matches both D and F
|
||||
// - The provided range is equivalent to [0, archetype.len)
|
||||
// - The if block ensures that ether D::IS_DENSE or F::IS_DENSE are false
|
||||
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) };
|
||||
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype,0..archetype.len()) };
|
||||
}
|
||||
}
|
||||
}
|
||||
accum
|
||||
|
|
Loading…
Reference in a new issue