Fair Change Detection Benchmarking (#11173)

# Objective

- #4972 introduce a benchmark to measure chang detection performance
- However,it uses `iter_batch ` cause a lot of overhead in clone data to
each routine closure(it feels like a bug in`iter_batch `) and constructs
new query in every iter.This overhead masks the real change detection
throughput we want to measure. Instead of evaluating raw change
detection, the benchmark ends up dominated by data cloning and
allocation costs.


## Solution

- Use iter_batch_ref to reduce the benchmark overload 
- Use cached query to better reflect real-world usage scenarios.
- Add more benmark

---

## Changelog
This commit is contained in:
re0312 2024-06-26 20:46:41 +08:00 committed by GitHub
parent 0ae7afbcad
commit f0bdce7425
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,7 +1,8 @@
use bevy_ecs::{
component::Component,
entity::Entity,
prelude::{Added, Changed},
prelude::{Added, Changed, EntityWorldMut, QueryState},
query::QueryFilter,
world::World,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
@ -14,15 +15,28 @@ criterion_group!(
all_changed_detection,
few_changed_detection,
none_changed_detection,
multiple_archetype_none_changed_detection
);
criterion_main!(benches);
macro_rules! modify {
($components:ident;$($index:tt),*) => {
$(
$components.$index.map(|mut v| {
v.0+=1.
});
)*
};
}
#[derive(Component, Default)]
#[component(storage = "Table")]
struct Table(f32);
#[derive(Component, Default)]
#[component(storage = "SparseSet")]
struct Sparse(f32);
#[derive(Component, Default)]
#[component(storage = "Table")]
struct Data<const X: u16>(f32);
trait BenchModify {
fn bench_modify(&mut self) -> f32;
@ -41,7 +55,7 @@ impl BenchModify for Sparse {
}
}
const RANGE_ENTITIES_TO_BENCH_COUNT: std::ops::Range<u32> = 5..7;
const ENTITIES_TO_BENCH_COUNT: &[u32] = &[5000, 50000];
type BenchGroup<'a> = criterion::BenchmarkGroup<'a, criterion::measurement::WallTime>;
@ -55,6 +69,11 @@ fn setup<T: Component + Default>(entity_count: u32) -> World {
black_box(world)
}
// create a cached query in setup to avoid extra costs in each iter
fn generic_filter_query<F: QueryFilter>(world: &mut World) -> QueryState<Entity, F> {
world.query_filtered::<Entity, F>()
}
fn generic_bench<P: Copy>(
bench_group: &mut BenchGroup,
mut benches: Vec<Box<dyn FnMut(&mut BenchGroup, P)>>,
@ -69,11 +88,14 @@ fn all_added_detection_generic<T: Component + Default>(group: &mut BenchGroup, e
group.bench_function(
format!("{}_entities_{}", entity_count, std::any::type_name::<T>()),
|bencher| {
bencher.iter_batched(
|| setup::<T>(entity_count),
|mut world| {
bencher.iter_batched_ref(
|| {
let mut world = setup::<T>(entity_count);
let query = generic_filter_query::<Added<T>>(&mut world);
(world, query)
},
|(ref mut world, ref mut query)| {
let mut count = 0;
let mut query = world.query_filtered::<Entity, Added<T>>();
for entity in query.iter(&world) {
black_box(entity);
count += 1;
@ -90,7 +112,7 @@ fn all_added_detection(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("all_added_detection");
group.warm_up_time(std::time::Duration::from_millis(500));
group.measurement_time(std::time::Duration::from_secs(4));
for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) {
for &entity_count in ENTITIES_TO_BENCH_COUNT {
generic_bench(
&mut group,
vec![
@ -109,7 +131,7 @@ fn all_changed_detection_generic<T: Component + Default + BenchModify>(
group.bench_function(
format!("{}_entities_{}", entity_count, std::any::type_name::<T>()),
|bencher| {
bencher.iter_batched(
bencher.iter_batched_ref(
|| {
let mut world = setup::<T>(entity_count);
world.clear_trackers();
@ -117,11 +139,11 @@ fn all_changed_detection_generic<T: Component + Default + BenchModify>(
for mut component in query.iter_mut(&mut world) {
black_box(component.bench_modify());
}
world
let query = generic_filter_query::<Changed<T>>(&mut world);
(world, query)
},
|mut world| {
|(ref mut world, ref mut query)| {
let mut count = 0;
let mut query = world.query_filtered::<Entity, Changed<T>>();
for entity in query.iter(&world) {
black_box(entity);
count += 1;
@ -138,7 +160,7 @@ fn all_changed_detection(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("all_changed_detection");
group.warm_up_time(std::time::Duration::from_millis(500));
group.measurement_time(std::time::Duration::from_secs(4));
for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) {
for &entity_count in ENTITIES_TO_BENCH_COUNT {
generic_bench(
&mut group,
vec![
@ -159,7 +181,7 @@ fn few_changed_detection_generic<T: Component + Default + BenchModify>(
group.bench_function(
format!("{}_entities_{}", entity_count, std::any::type_name::<T>()),
|bencher| {
bencher.iter_batched(
bencher.iter_batched_ref(
|| {
let mut world = setup::<T>(entity_count);
world.clear_trackers();
@ -170,10 +192,10 @@ fn few_changed_detection_generic<T: Component + Default + BenchModify>(
for component in to_modify[0..amount_to_modify].iter_mut() {
black_box(component.bench_modify());
}
world
let query = generic_filter_query::<Changed<T>>(&mut world);
(world, query)
},
|mut world| {
let mut query = world.query_filtered::<Entity, Changed<T>>();
|(ref mut world, ref mut query)| {
for entity in query.iter(&world) {
black_box(entity);
}
@ -188,7 +210,7 @@ fn few_changed_detection(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("few_changed_detection");
group.warm_up_time(std::time::Duration::from_millis(500));
group.measurement_time(std::time::Duration::from_secs(4));
for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) {
for &entity_count in ENTITIES_TO_BENCH_COUNT {
generic_bench(
&mut group,
vec![
@ -207,15 +229,15 @@ fn none_changed_detection_generic<T: Component + Default>(
group.bench_function(
format!("{}_entities_{}", entity_count, std::any::type_name::<T>()),
|bencher| {
bencher.iter_batched(
bencher.iter_batched_ref(
|| {
let mut world = setup::<T>(entity_count);
world.clear_trackers();
world
let query = generic_filter_query::<Changed<T>>(&mut world);
(world, query)
},
|mut world| {
|(ref mut world, ref mut query)| {
let mut count = 0;
let mut query = world.query_filtered::<Entity, Changed<T>>();
for entity in query.iter(&world) {
black_box(entity);
count += 1;
@ -232,7 +254,7 @@ fn none_changed_detection(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("none_changed_detection");
group.warm_up_time(std::time::Duration::from_millis(500));
group.measurement_time(std::time::Duration::from_secs(4));
for entity_count in RANGE_ENTITIES_TO_BENCH_COUNT.map(|i| i * 10_000) {
for &entity_count in ENTITIES_TO_BENCH_COUNT {
generic_bench(
&mut group,
vec![
@ -243,3 +265,111 @@ fn none_changed_detection(criterion: &mut Criterion) {
);
}
}
fn insert_if_bit_enabled<const B: u16>(entity: &mut EntityWorldMut, i: u16) {
if i & 1 << B != 0 {
entity.insert(Data::<B>(1.0));
}
}
fn add_archetypes_entities<T: Component + Default>(
world: &mut World,
archetype_count: u16,
entity_count: u32,
) {
for i in 0..archetype_count {
for _j in 0..entity_count {
let mut e = world.spawn(T::default());
insert_if_bit_enabled::<0>(&mut e, i);
insert_if_bit_enabled::<1>(&mut e, i);
insert_if_bit_enabled::<2>(&mut e, i);
insert_if_bit_enabled::<3>(&mut e, i);
insert_if_bit_enabled::<4>(&mut e, i);
insert_if_bit_enabled::<5>(&mut e, i);
insert_if_bit_enabled::<6>(&mut e, i);
insert_if_bit_enabled::<7>(&mut e, i);
insert_if_bit_enabled::<8>(&mut e, i);
insert_if_bit_enabled::<9>(&mut e, i);
insert_if_bit_enabled::<10>(&mut e, i);
insert_if_bit_enabled::<11>(&mut e, i);
insert_if_bit_enabled::<12>(&mut e, i);
insert_if_bit_enabled::<13>(&mut e, i);
insert_if_bit_enabled::<14>(&mut e, i);
insert_if_bit_enabled::<15>(&mut e, i);
}
}
}
fn multiple_archetype_none_changed_detection_generic<T: Component + Default + BenchModify>(
group: &mut BenchGroup,
archetype_count: u16,
entity_count: u32,
) {
group.bench_function(
format!(
"{}_archetypes_{}_entities_{}",
archetype_count,
entity_count,
std::any::type_name::<T>()
),
|bencher| {
bencher.iter_batched_ref(
|| {
let mut world = World::new();
add_archetypes_entities::<T>(&mut world, archetype_count, entity_count);
world.clear_trackers();
let mut query = world.query::<(
Option<&mut Data<0>>,
Option<&mut Data<1>>,
Option<&mut Data<2>>,
Option<&mut Data<3>>,
Option<&mut Data<4>>,
Option<&mut Data<5>>,
Option<&mut Data<6>>,
Option<&mut Data<7>>,
Option<&mut Data<8>>,
Option<&mut Data<9>>,
Option<&mut Data<10>>,
Option<&mut Data<11>>,
Option<&mut Data<12>>,
Option<&mut Data<13>>,
Option<&mut Data<14>>,
)>();
for components in query.iter_mut(&mut world) {
// change Data<X> while keeping T unchanged
modify!(components;0,1,2,3,4,5,6,7,8,9,10,11,12,13,14);
}
let query = generic_filter_query::<Changed<T>>(&mut world);
(world, query)
},
|(ref mut world, ref mut query)| {
let mut count = 0;
for entity in query.iter(&world) {
black_box(entity);
count += 1;
}
assert_eq!(0, count);
},
criterion::BatchSize::LargeInput,
)
},
);
}
fn multiple_archetype_none_changed_detection(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("multiple_archetypes_none_changed_detection");
group.warm_up_time(std::time::Duration::from_millis(800));
group.measurement_time(std::time::Duration::from_secs(8));
for archetype_count in [5, 20, 100] {
for entity_count in [10, 100, 1000, 10000] {
multiple_archetype_none_changed_detection_generic::<Table>(
&mut group,
archetype_count,
entity_count,
);
multiple_archetype_none_changed_detection_generic::<Sparse>(
&mut group,
archetype_count,
entity_count,
);
}
}
}