mirror of
https://github.com/bevyengine/bevy
synced 2024-11-22 12:43:34 +00:00
Opportunistically use dense iter for archetypal iteration in Par_iter (#14673)
# Objective - follow of #14049 ,we could use it on our Parallel Iterator,this pr also unified the used function in both regular iter and parallel iterations. ## Performance ![image](https://github.com/user-attachments/assets/cba700bc-169c-4b58-b504-823bdca8ec05) no performance regression for regular itertaion 3.5X faster in hybrid parallel iteraion,this number is far greater than the benefits obtained in regular iteration(~1.81) because mutable iterations on continuous memory can effectively reduce the cost of mataining core cache coherence
This commit is contained in:
parent
ec728c31c1
commit
739007f148
5 changed files with 121 additions and 76 deletions
|
@ -20,7 +20,7 @@ impl<'w> Benchmark<'w> {
|
||||||
|
|
||||||
let mut v = vec![];
|
let mut v = vec![];
|
||||||
for _ in 0..10000 {
|
for _ in 0..10000 {
|
||||||
world.spawn((TableData(0.0), SparseData(0.0))).id();
|
world.spawn((TableData(0.0), SparseData(0.0)));
|
||||||
v.push(world.spawn(TableData(0.)).id());
|
v.push(world.spawn(TableData(0.)).id());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ mod iter_simple_system;
|
||||||
mod iter_simple_wide;
|
mod iter_simple_wide;
|
||||||
mod iter_simple_wide_sparse_set;
|
mod iter_simple_wide_sparse_set;
|
||||||
mod par_iter_simple;
|
mod par_iter_simple;
|
||||||
|
mod par_iter_simple_foreach_hybrid;
|
||||||
|
|
||||||
use heavy_compute::*;
|
use heavy_compute::*;
|
||||||
|
|
||||||
|
@ -135,4 +136,8 @@ fn par_iter_simple(c: &mut Criterion) {
|
||||||
b.iter(move || bench.run());
|
b.iter(move || bench.run());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
group.bench_function(format!("hybrid"), |b| {
|
||||||
|
let mut bench = par_iter_simple_foreach_hybrid::Benchmark::new();
|
||||||
|
b.iter(move || bench.run());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
use bevy_ecs::prelude::*;
|
||||||
|
use bevy_tasks::{ComputeTaskPool, TaskPool};
|
||||||
|
use rand::{prelude::SliceRandom, SeedableRng};
|
||||||
|
use rand_chacha::ChaCha8Rng;
|
||||||
|
|
||||||
|
#[derive(Component, Copy, Clone)]
|
||||||
|
struct TableData(f32);
|
||||||
|
|
||||||
|
#[derive(Component, Copy, Clone)]
|
||||||
|
#[component(storage = "SparseSet")]
|
||||||
|
struct SparseData(f32);
|
||||||
|
|
||||||
|
fn deterministic_rand() -> ChaCha8Rng {
|
||||||
|
ChaCha8Rng::seed_from_u64(42)
|
||||||
|
}
|
||||||
|
pub struct Benchmark<'w>(World, QueryState<(&'w mut TableData, &'w SparseData)>);
|
||||||
|
|
||||||
|
impl<'w> Benchmark<'w> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let mut world = World::new();
|
||||||
|
ComputeTaskPool::get_or_init(TaskPool::default);
|
||||||
|
|
||||||
|
let mut v = vec![];
|
||||||
|
for _ in 0..100000 {
|
||||||
|
world.spawn((TableData(0.0), SparseData(0.0)));
|
||||||
|
v.push(world.spawn(TableData(0.)).id());
|
||||||
|
}
|
||||||
|
|
||||||
|
// by shuffling ,randomize the archetype iteration order to significantly deviate from the table order. This maximizes the loss of cache locality during archetype-based iteration.
|
||||||
|
v.shuffle(&mut deterministic_rand());
|
||||||
|
for e in v.into_iter() {
|
||||||
|
world.entity_mut(e).despawn();
|
||||||
|
}
|
||||||
|
|
||||||
|
let query = world.query::<(&mut TableData, &SparseData)>();
|
||||||
|
Self(world, query)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(never)]
|
||||||
|
pub fn run(&mut self) {
|
||||||
|
self.1
|
||||||
|
.par_iter_mut(&mut self.0)
|
||||||
|
.for_each(|(mut v1, v2)| v1.0 += v2.0)
|
||||||
|
}
|
||||||
|
}
|
|
@ -122,6 +122,67 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
|
||||||
|
/// from an storage.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
/// - `range` must be in `[0, storage::entity_count)` or None.
|
||||||
|
#[inline]
|
||||||
|
pub(super) unsafe fn fold_over_storage_range<B, Func>(
|
||||||
|
&mut self,
|
||||||
|
mut accum: B,
|
||||||
|
func: &mut Func,
|
||||||
|
storage: StorageId,
|
||||||
|
range: Option<Range<usize>>,
|
||||||
|
) -> B
|
||||||
|
where
|
||||||
|
Func: FnMut(B, D::Item<'w>) -> B,
|
||||||
|
{
|
||||||
|
if self.cursor.is_dense {
|
||||||
|
// SAFETY: `self.cursor.is_dense` is true, so storage ids are guaranteed to be table ids.
|
||||||
|
let table_id = unsafe { storage.table_id };
|
||||||
|
// SAFETY: Matched table IDs are guaranteed to still exist.
|
||||||
|
let table = unsafe { self.tables.get(table_id).debug_checked_unwrap() };
|
||||||
|
|
||||||
|
let range = range.unwrap_or(0..table.entity_count());
|
||||||
|
accum =
|
||||||
|
// SAFETY:
|
||||||
|
// - The fetched table matches both D and F
|
||||||
|
// - caller ensures `range` is within `[0, table.entity_count)`
|
||||||
|
// - The if block ensures that the query iteration is dense
|
||||||
|
unsafe { self.fold_over_table_range(accum, func, table, range) };
|
||||||
|
} else {
|
||||||
|
// SAFETY: `self.cursor.is_dense` is false, so storage ids are guaranteed to be archetype ids.
|
||||||
|
let archetype_id = unsafe { storage.archetype_id };
|
||||||
|
// SAFETY: Matched archetype IDs are guaranteed to still exist.
|
||||||
|
let archetype = unsafe { self.archetypes.get(archetype_id).debug_checked_unwrap() };
|
||||||
|
// SAFETY: Matched table IDs are guaranteed to still exist.
|
||||||
|
let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() };
|
||||||
|
|
||||||
|
let range = range.unwrap_or(0..archetype.len());
|
||||||
|
|
||||||
|
// When an archetype and its table have equal entity counts, dense iteration can be safely used.
|
||||||
|
// this leverages cache locality to optimize performance.
|
||||||
|
if table.entity_count() == archetype.len() {
|
||||||
|
accum =
|
||||||
|
// SAFETY:
|
||||||
|
// - The fetched archetype matches both D and F
|
||||||
|
// - The provided archetype and its' table have the same length.
|
||||||
|
// - caller ensures `range` is within `[0, archetype.len)`
|
||||||
|
// - The if block ensures that the query iteration is not dense.
|
||||||
|
unsafe { self.fold_over_dense_archetype_range(accum, func, archetype,range) };
|
||||||
|
} else {
|
||||||
|
accum =
|
||||||
|
// SAFETY:
|
||||||
|
// - The fetched archetype matches both D and F
|
||||||
|
// - caller ensures `range` is within `[0, archetype.len)`
|
||||||
|
// - The if block ensures that the query iteration is not dense.
|
||||||
|
unsafe { self.fold_over_archetype_range(accum, func, archetype,range) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
accum
|
||||||
|
}
|
||||||
|
|
||||||
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
|
/// Executes the equivalent of [`Iterator::fold`] over a contiguous segment
|
||||||
/// from an table.
|
/// from an table.
|
||||||
///
|
///
|
||||||
|
@ -143,7 +204,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
|
||||||
if table.is_empty() {
|
if table.is_empty() {
|
||||||
return accum;
|
return accum;
|
||||||
}
|
}
|
||||||
assert!(
|
debug_assert!(
|
||||||
rows.end <= u32::MAX as usize,
|
rows.end <= u32::MAX as usize,
|
||||||
"TableRow is only valid up to u32::MAX"
|
"TableRow is only valid up to u32::MAX"
|
||||||
);
|
);
|
||||||
|
@ -267,12 +328,11 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIter<'w, 's, D, F> {
|
||||||
if archetype.is_empty() {
|
if archetype.is_empty() {
|
||||||
return accum;
|
return accum;
|
||||||
}
|
}
|
||||||
assert!(
|
debug_assert!(
|
||||||
rows.end <= u32::MAX as usize,
|
rows.end <= u32::MAX as usize,
|
||||||
"TableRow is only valid up to u32::MAX"
|
"TableRow is only valid up to u32::MAX"
|
||||||
);
|
);
|
||||||
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();
|
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();
|
||||||
|
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
archetype.len() == table.entity_count(),
|
archetype.len() == table.entity_count(),
|
||||||
"archetype and it's table must have the same length. "
|
"archetype and it's table must have the same length. "
|
||||||
|
@ -1032,48 +1092,10 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F>
|
||||||
accum = func(accum, item);
|
accum = func(accum, item);
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.cursor.is_dense {
|
for id in self.cursor.storage_id_iter.clone().copied() {
|
||||||
for id in self.cursor.storage_id_iter.clone() {
|
// SAFETY:
|
||||||
// SAFETY: `self.cursor.is_dense` is true, so storage ids are guaranteed to be table ids.
|
// - The range(None) is equivalent to [0, storage.entity_count)
|
||||||
let table_id = unsafe { id.table_id };
|
accum = unsafe { self.fold_over_storage_range(accum, &mut func, id, None) };
|
||||||
// SAFETY: Matched table IDs are guaranteed to still exist.
|
|
||||||
let table = unsafe { self.tables.get(table_id).debug_checked_unwrap() };
|
|
||||||
|
|
||||||
accum =
|
|
||||||
// SAFETY:
|
|
||||||
// - The fetched table matches both D and F
|
|
||||||
// - The provided range is equivalent to [0, table.entity_count)
|
|
||||||
// - The if block ensures that the query iteration is dense
|
|
||||||
unsafe { self.fold_over_table_range(accum, &mut func, table, 0..table.entity_count()) };
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for id in self.cursor.storage_id_iter.clone() {
|
|
||||||
// SAFETY: `self.cursor.is_dense` is false, so storage ids are guaranteed to be archetype ids.
|
|
||||||
let archetype_id = unsafe { id.archetype_id };
|
|
||||||
// SAFETY: Matched archetype IDs are guaranteed to still exist.
|
|
||||||
let archetype = unsafe { self.archetypes.get(archetype_id).debug_checked_unwrap() };
|
|
||||||
// SAFETY: Matched table IDs are guaranteed to still exist.
|
|
||||||
let table = unsafe { self.tables.get(archetype.table_id()).debug_checked_unwrap() };
|
|
||||||
|
|
||||||
// When an archetype and its table have equal entity counts, dense iteration can be safely used.
|
|
||||||
// this leverages cache locality to optimize performance.
|
|
||||||
if table.entity_count() == archetype.len() {
|
|
||||||
accum =
|
|
||||||
// SAFETY:
|
|
||||||
// - The fetched archetype matches both D and F
|
|
||||||
// - The provided archetype and its' table have the same length.
|
|
||||||
// - The provided range is equivalent to [0, archetype.len)
|
|
||||||
// - The if block ensures that the query iteration is not dense.
|
|
||||||
unsafe { self.fold_over_dense_archetype_range(accum, &mut func, archetype, 0..archetype.len()) };
|
|
||||||
} else {
|
|
||||||
accum =
|
|
||||||
// SAFETY:
|
|
||||||
// - The fetched archetype matches both D and F
|
|
||||||
// - The provided range is equivalent to [0, archetype.len)
|
|
||||||
// - The if block ensures that the query iteration is not dense.
|
|
||||||
unsafe { self.fold_over_archetype_range(accum, &mut func, archetype, 0..archetype.len()) };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
accum
|
accum
|
||||||
}
|
}
|
||||||
|
|
|
@ -1505,25 +1505,7 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
|
||||||
let mut iter = self.iter_unchecked_manual(world, last_run, this_run);
|
let mut iter = self.iter_unchecked_manual(world, last_run, this_run);
|
||||||
let mut accum = init_accum();
|
let mut accum = init_accum();
|
||||||
for storage_id in queue {
|
for storage_id in queue {
|
||||||
if self.is_dense {
|
accum = iter.fold_over_storage_range(accum, &mut func, storage_id, None);
|
||||||
let id = storage_id.table_id;
|
|
||||||
let table = &world.storages().tables.get(id).debug_checked_unwrap();
|
|
||||||
accum = iter.fold_over_table_range(
|
|
||||||
accum,
|
|
||||||
&mut func,
|
|
||||||
table,
|
|
||||||
0..table.entity_count(),
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
let id = storage_id.archetype_id;
|
|
||||||
let archetype = world.archetypes().get(id).debug_checked_unwrap();
|
|
||||||
accum = iter.fold_over_archetype_range(
|
|
||||||
accum,
|
|
||||||
&mut func,
|
|
||||||
archetype,
|
|
||||||
0..archetype.len(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
@ -1539,17 +1521,8 @@ impl<D: QueryData, F: QueryFilter> QueryState<D, F> {
|
||||||
#[cfg(feature = "trace")]
|
#[cfg(feature = "trace")]
|
||||||
let _span = self.par_iter_span.enter();
|
let _span = self.par_iter_span.enter();
|
||||||
let accum = init_accum();
|
let accum = init_accum();
|
||||||
if self.is_dense {
|
self.iter_unchecked_manual(world, last_run, this_run)
|
||||||
let id = storage_id.table_id;
|
.fold_over_storage_range(accum, &mut func, storage_id, Some(batch));
|
||||||
let table = world.storages().tables.get(id).debug_checked_unwrap();
|
|
||||||
self.iter_unchecked_manual(world, last_run, this_run)
|
|
||||||
.fold_over_table_range(accum, &mut func, table, batch);
|
|
||||||
} else {
|
|
||||||
let id = storage_id.archetype_id;
|
|
||||||
let archetype = world.archetypes().get(id).debug_checked_unwrap();
|
|
||||||
self.iter_unchecked_manual(world, last_run, this_run)
|
|
||||||
.fold_over_archetype_range(accum, &mut func, archetype, batch);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue