mirror of
https://github.com/bevyengine/bevy
synced 2024-11-10 07:04:33 +00:00
Optimize cloning for Access-related structs (#14502)
# Objective Optimize the cloning process for Access-related structs in the ECS system, specifically targeting the `clone_from` method. Previously, profiling showed that 1% of CPU time was spent in `FixedBitSet`'s `drop_in_place`, due to the default `clone_from` implementation: ```rust fn clone_from(&mut self, source: &Self) { *self = source.clone() } ``` This implementation causes unnecessary allocations and deallocations. However, [FixedBitSet provides a more optimized clone_from method](https://github.com/petgraph/fixedbitset/blob/master/src/lib.rs#L1445-L1465) that avoids these allocations and utilizes SIMD instructions for better performance. This PR aims to leverage the optimized clone_from method of FixedBitSet and implement custom clone_from methods for Access-related structs to take full advantage of this optimization. By doing so, we expect to significantly reduce CPU time spent on cloning operations and improve overall system performance. ![image](https://github.com/user-attachments/assets/7526a5c5-c75b-4a9a-b8d2-891f64fd553b) ## Solution - Implemented custom `clone` and `clone_from` methods for `Access`, `FilteredAccess`, `AccessFilters`, and `FilteredAccessSet` structs. - Removed `#[derive(Clone)]` and manually implemented `Clone` trait to use optimized `clone_from` method from `FixedBitSet`. - Added unit tests for cloning and `clone_from` methods to ensure correctness. ## Testing - Conducted performance testing comparing the original and optimized versions. - Measured CPU time consumption for the `clone_from` method: - Original version: 1.34% of CPU time - Optimized version: 0.338% of CPU time - Compared FPS before and after the changes (results may vary depending on the run): Before optimization: ``` 2024-07-28T12:49:11.864019Z INFO bevy diagnostic: fps : 213.489463 (avg 214.502488) 2024-07-28T12:49:11.864037Z INFO bevy diagnostic: frame_time : 4.704746ms (avg 4.682251ms) 2024-07-28T12:49:11.864042Z INFO bevy diagnostic: frame_count: 7947.000000 (avg 7887.500000) ``` ![image](https://github.com/user-attachments/assets/7865a365-0569-4b46-814a-964779d90973) After optimization: ``` 2024-07-28T12:29:42.705738Z INFO bevy diagnostic: fps : 220.273721 (avg 220.912227) 2024-07-28T12:29:42.705762Z INFO bevy diagnostic: frame_time : 4.559127ms (avg 4.544905ms) 2024-07-28T12:29:42.705769Z INFO bevy diagnostic: frame_count: 7596.000000 (avg 7536.500000) ``` ![image](https://github.com/user-attachments/assets/8dd96908-86d0-4850-8e29-f80176a005d6) --- Reviewers can test these changes by running `cargo run --release --example ssr`
This commit is contained in:
parent
7ffd6eade1
commit
455c1bfbe8
1 changed files with 204 additions and 4 deletions
|
@ -47,7 +47,7 @@ impl<'a, T: SparseSetIndex + fmt::Debug> fmt::Debug for FormattedBitSet<'a, T> {
|
|||
///
|
||||
/// Used internally to ensure soundness during system initialization and execution.
|
||||
/// See the [`is_compatible`](Access::is_compatible) and [`get_conflicts`](Access::get_conflicts) functions.
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub struct Access<T: SparseSetIndex> {
|
||||
/// All accessed elements.
|
||||
reads_and_writes: FixedBitSet,
|
||||
|
@ -64,6 +64,28 @@ pub struct Access<T: SparseSetIndex> {
|
|||
marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
|
||||
impl<T: SparseSetIndex> Clone for Access<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
reads_and_writes: self.reads_and_writes.clone(),
|
||||
writes: self.writes.clone(),
|
||||
reads_all: self.reads_all,
|
||||
writes_all: self.writes_all,
|
||||
archetypal: self.archetypal.clone(),
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
self.reads_and_writes.clone_from(&source.reads_and_writes);
|
||||
self.writes.clone_from(&source.writes);
|
||||
self.reads_all = source.reads_all;
|
||||
self.writes_all = source.writes_all;
|
||||
self.archetypal.clone_from(&source.archetypal);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SparseSetIndex + fmt::Debug> fmt::Debug for Access<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Access")
|
||||
|
@ -325,7 +347,7 @@ impl<T: SparseSetIndex> Access<T> {
|
|||
/// - `Query<Option<&T>>` accesses nothing
|
||||
///
|
||||
/// See comments the [`WorldQuery`](super::WorldQuery) impls of [`AnyOf`](super::AnyOf)/`Option`/[`Or`](super::Or) for more information.
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct FilteredAccess<T: SparseSetIndex> {
|
||||
pub(crate) access: Access<T>,
|
||||
pub(crate) required: FixedBitSet,
|
||||
|
@ -334,6 +356,23 @@ pub struct FilteredAccess<T: SparseSetIndex> {
|
|||
pub(crate) filter_sets: Vec<AccessFilters<T>>,
|
||||
}
|
||||
|
||||
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
|
||||
impl<T: SparseSetIndex> Clone for FilteredAccess<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
access: self.access.clone(),
|
||||
required: self.required.clone(),
|
||||
filter_sets: self.filter_sets.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
self.access.clone_from(&source.access);
|
||||
self.required.clone_from(&source.required);
|
||||
self.filter_sets.clone_from(&source.filter_sets);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SparseSetIndex> Default for FilteredAccess<T> {
|
||||
fn default() -> Self {
|
||||
Self::matches_everything()
|
||||
|
@ -526,13 +565,29 @@ impl<T: SparseSetIndex> FilteredAccess<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub(crate) struct AccessFilters<T> {
|
||||
pub(crate) with: FixedBitSet,
|
||||
pub(crate) without: FixedBitSet,
|
||||
_index_type: PhantomData<T>,
|
||||
}
|
||||
|
||||
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
|
||||
impl<T: SparseSetIndex> Clone for AccessFilters<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
with: self.with.clone(),
|
||||
without: self.without.clone(),
|
||||
_index_type: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
self.with.clone_from(&source.with);
|
||||
self.without.clone_from(&source.without);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SparseSetIndex + fmt::Debug> fmt::Debug for AccessFilters<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("AccessFilters")
|
||||
|
@ -570,12 +625,27 @@ impl<T: SparseSetIndex> AccessFilters<T> {
|
|||
/// It stores multiple sets of accesses.
|
||||
/// - A "combined" set, which is the access of all filters in this set combined.
|
||||
/// - The set of access of each individual filters in this set.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct FilteredAccessSet<T: SparseSetIndex> {
|
||||
combined_access: Access<T>,
|
||||
filtered_accesses: Vec<FilteredAccess<T>>,
|
||||
}
|
||||
|
||||
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
|
||||
impl<T: SparseSetIndex> Clone for FilteredAccessSet<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
combined_access: self.combined_access.clone(),
|
||||
filtered_accesses: self.filtered_accesses.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
self.combined_access.clone_from(&source.combined_access);
|
||||
self.filtered_accesses.clone_from(&source.filtered_accesses);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SparseSetIndex> FilteredAccessSet<T> {
|
||||
/// Returns a reference to the unfiltered access of the entire set.
|
||||
#[inline]
|
||||
|
@ -696,6 +766,136 @@ mod tests {
|
|||
use fixedbitset::FixedBitSet;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
fn create_sample_access() -> Access<usize> {
|
||||
let mut access = Access::<usize>::default();
|
||||
|
||||
access.add_read(1);
|
||||
access.add_read(2);
|
||||
access.add_write(3);
|
||||
access.add_archetypal(5);
|
||||
access.read_all();
|
||||
|
||||
access
|
||||
}
|
||||
|
||||
fn create_sample_filtered_access() -> FilteredAccess<usize> {
|
||||
let mut filtered_access = FilteredAccess::<usize>::default();
|
||||
|
||||
filtered_access.add_write(1);
|
||||
filtered_access.add_read(2);
|
||||
filtered_access.add_required(3);
|
||||
filtered_access.and_with(4);
|
||||
|
||||
filtered_access
|
||||
}
|
||||
|
||||
fn create_sample_access_filters() -> AccessFilters<usize> {
|
||||
let mut access_filters = AccessFilters::<usize>::default();
|
||||
|
||||
access_filters.with.grow_and_insert(3);
|
||||
access_filters.without.grow_and_insert(5);
|
||||
|
||||
access_filters
|
||||
}
|
||||
|
||||
fn create_sample_filtered_access_set() -> FilteredAccessSet<usize> {
|
||||
let mut filtered_access_set = FilteredAccessSet::<usize>::default();
|
||||
|
||||
filtered_access_set.add_unfiltered_read(2);
|
||||
filtered_access_set.add_unfiltered_write(4);
|
||||
filtered_access_set.read_all();
|
||||
|
||||
filtered_access_set
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_access_clone() {
|
||||
let original: Access<usize> = create_sample_access();
|
||||
let cloned = original.clone();
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_access_clone_from() {
|
||||
let original: Access<usize> = create_sample_access();
|
||||
let mut cloned = Access::<usize>::default();
|
||||
|
||||
cloned.add_write(7);
|
||||
cloned.add_read(4);
|
||||
cloned.add_archetypal(8);
|
||||
cloned.write_all();
|
||||
|
||||
cloned.clone_from(&original);
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filtered_access_clone() {
|
||||
let original: FilteredAccess<usize> = create_sample_filtered_access();
|
||||
let cloned = original.clone();
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filtered_access_clone_from() {
|
||||
let original: FilteredAccess<usize> = create_sample_filtered_access();
|
||||
let mut cloned = FilteredAccess::<usize>::default();
|
||||
|
||||
cloned.add_write(7);
|
||||
cloned.add_read(4);
|
||||
cloned.append_or(&FilteredAccess::default());
|
||||
|
||||
cloned.clone_from(&original);
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_access_filters_clone() {
|
||||
let original: AccessFilters<usize> = create_sample_access_filters();
|
||||
let cloned = original.clone();
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_access_filters_clone_from() {
|
||||
let original: AccessFilters<usize> = create_sample_access_filters();
|
||||
let mut cloned = AccessFilters::<usize>::default();
|
||||
|
||||
cloned.with.grow_and_insert(1);
|
||||
cloned.without.grow_and_insert(2);
|
||||
|
||||
cloned.clone_from(&original);
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filtered_access_set_clone() {
|
||||
let original: FilteredAccessSet<usize> = create_sample_filtered_access_set();
|
||||
let cloned = original.clone();
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filtered_access_set_from() {
|
||||
let original: FilteredAccessSet<usize> = create_sample_filtered_access_set();
|
||||
let mut cloned = FilteredAccessSet::<usize>::default();
|
||||
|
||||
cloned.add_unfiltered_read(7);
|
||||
cloned.add_unfiltered_write(9);
|
||||
cloned.write_all();
|
||||
|
||||
cloned.clone_from(&original);
|
||||
|
||||
assert_eq!(original, cloned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_all_access_conflicts() {
|
||||
// read_all / single write
|
||||
|
|
Loading…
Reference in a new issue