Optimize cloning for Access-related structs (#14502)

# Objective

Optimize the cloning process for Access-related structs in the ECS
system, specifically targeting the `clone_from` method.

Previously, profiling showed that 1% of CPU time was spent in
`FixedBitSet`'s `drop_in_place`, due to the default `clone_from`
implementation:

```rust
fn clone_from(&mut self, source: &Self) {
    *self = source.clone()
}
```

This implementation causes unnecessary allocations and deallocations.
However, [FixedBitSet provides a more optimized clone_from
method](https://github.com/petgraph/fixedbitset/blob/master/src/lib.rs#L1445-L1465)
that avoids these allocations and utilizes SIMD instructions for better
performance.

This PR aims to leverage the optimized clone_from method of FixedBitSet
and implement custom clone_from methods for Access-related structs to
take full advantage of this optimization. By doing so, we expect to
significantly reduce CPU time spent on cloning operations and improve
overall system performance.



![image](https://github.com/user-attachments/assets/7526a5c5-c75b-4a9a-b8d2-891f64fd553b)


## Solution

- Implemented custom `clone` and `clone_from` methods for `Access`,
`FilteredAccess`, `AccessFilters`, and `FilteredAccessSet` structs.
- Removed `#[derive(Clone)]` and manually implemented `Clone` trait to
use optimized `clone_from` method from `FixedBitSet`.
- Added unit tests for cloning and `clone_from` methods to ensure
correctness.

## Testing

- Conducted performance testing comparing the original and optimized
versions.
- Measured CPU time consumption for the `clone_from` method:
  - Original version: 1.34% of CPU time
  - Optimized version: 0.338% of CPU time
- Compared FPS before and after the changes (results may vary depending
on the run):

Before optimization:
```
2024-07-28T12:49:11.864019Z  INFO bevy diagnostic: fps        :  213.489463   (avg 214.502488)
2024-07-28T12:49:11.864037Z  INFO bevy diagnostic: frame_time :    4.704746ms (avg 4.682251ms)
2024-07-28T12:49:11.864042Z  INFO bevy diagnostic: frame_count: 7947.000000   (avg 7887.500000)
```


![image](https://github.com/user-attachments/assets/7865a365-0569-4b46-814a-964779d90973)

After optimization:
```
2024-07-28T12:29:42.705738Z  INFO bevy diagnostic: fps        :  220.273721   (avg 220.912227)
2024-07-28T12:29:42.705762Z  INFO bevy diagnostic: frame_time :    4.559127ms (avg 4.544905ms)
2024-07-28T12:29:42.705769Z  INFO bevy diagnostic: frame_count: 7596.000000   (avg 7536.500000)
```


![image](https://github.com/user-attachments/assets/8dd96908-86d0-4850-8e29-f80176a005d6)

---

Reviewers can test these changes by running `cargo run --release
--example ssr`
This commit is contained in:
Rostyslav Toch 2024-07-30 00:48:21 +01:00 committed by GitHub
parent 7ffd6eade1
commit 455c1bfbe8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -47,7 +47,7 @@ impl<'a, T: SparseSetIndex + fmt::Debug> fmt::Debug for FormattedBitSet<'a, T> {
///
/// Used internally to ensure soundness during system initialization and execution.
/// See the [`is_compatible`](Access::is_compatible) and [`get_conflicts`](Access::get_conflicts) functions.
#[derive(Clone, Eq, PartialEq)]
#[derive(Eq, PartialEq)]
pub struct Access<T: SparseSetIndex> {
/// All accessed elements.
reads_and_writes: FixedBitSet,
@ -64,6 +64,28 @@ pub struct Access<T: SparseSetIndex> {
marker: PhantomData<T>,
}
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
impl<T: SparseSetIndex> Clone for Access<T> {
fn clone(&self) -> Self {
Self {
reads_and_writes: self.reads_and_writes.clone(),
writes: self.writes.clone(),
reads_all: self.reads_all,
writes_all: self.writes_all,
archetypal: self.archetypal.clone(),
marker: PhantomData,
}
}
fn clone_from(&mut self, source: &Self) {
self.reads_and_writes.clone_from(&source.reads_and_writes);
self.writes.clone_from(&source.writes);
self.reads_all = source.reads_all;
self.writes_all = source.writes_all;
self.archetypal.clone_from(&source.archetypal);
}
}
impl<T: SparseSetIndex + fmt::Debug> fmt::Debug for Access<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Access")
@ -325,7 +347,7 @@ impl<T: SparseSetIndex> Access<T> {
/// - `Query<Option<&T>>` accesses nothing
///
/// See comments the [`WorldQuery`](super::WorldQuery) impls of [`AnyOf`](super::AnyOf)/`Option`/[`Or`](super::Or) for more information.
#[derive(Debug, Clone, Eq, PartialEq)]
#[derive(Debug, Eq, PartialEq)]
pub struct FilteredAccess<T: SparseSetIndex> {
pub(crate) access: Access<T>,
pub(crate) required: FixedBitSet,
@ -334,6 +356,23 @@ pub struct FilteredAccess<T: SparseSetIndex> {
pub(crate) filter_sets: Vec<AccessFilters<T>>,
}
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
impl<T: SparseSetIndex> Clone for FilteredAccess<T> {
fn clone(&self) -> Self {
Self {
access: self.access.clone(),
required: self.required.clone(),
filter_sets: self.filter_sets.clone(),
}
}
fn clone_from(&mut self, source: &Self) {
self.access.clone_from(&source.access);
self.required.clone_from(&source.required);
self.filter_sets.clone_from(&source.filter_sets);
}
}
impl<T: SparseSetIndex> Default for FilteredAccess<T> {
fn default() -> Self {
Self::matches_everything()
@ -526,13 +565,29 @@ impl<T: SparseSetIndex> FilteredAccess<T> {
}
}
#[derive(Clone, Eq, PartialEq)]
#[derive(Eq, PartialEq)]
pub(crate) struct AccessFilters<T> {
pub(crate) with: FixedBitSet,
pub(crate) without: FixedBitSet,
_index_type: PhantomData<T>,
}
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
impl<T: SparseSetIndex> Clone for AccessFilters<T> {
fn clone(&self) -> Self {
Self {
with: self.with.clone(),
without: self.without.clone(),
_index_type: PhantomData,
}
}
fn clone_from(&mut self, source: &Self) {
self.with.clone_from(&source.with);
self.without.clone_from(&source.without);
}
}
impl<T: SparseSetIndex + fmt::Debug> fmt::Debug for AccessFilters<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("AccessFilters")
@ -570,12 +625,27 @@ impl<T: SparseSetIndex> AccessFilters<T> {
/// It stores multiple sets of accesses.
/// - A "combined" set, which is the access of all filters in this set combined.
/// - The set of access of each individual filters in this set.
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Eq)]
pub struct FilteredAccessSet<T: SparseSetIndex> {
combined_access: Access<T>,
filtered_accesses: Vec<FilteredAccess<T>>,
}
// This is needed since `#[derive(Clone)]` does not generate optimized `clone_from`.
impl<T: SparseSetIndex> Clone for FilteredAccessSet<T> {
fn clone(&self) -> Self {
Self {
combined_access: self.combined_access.clone(),
filtered_accesses: self.filtered_accesses.clone(),
}
}
fn clone_from(&mut self, source: &Self) {
self.combined_access.clone_from(&source.combined_access);
self.filtered_accesses.clone_from(&source.filtered_accesses);
}
}
impl<T: SparseSetIndex> FilteredAccessSet<T> {
/// Returns a reference to the unfiltered access of the entire set.
#[inline]
@ -696,6 +766,136 @@ mod tests {
use fixedbitset::FixedBitSet;
use std::marker::PhantomData;
fn create_sample_access() -> Access<usize> {
let mut access = Access::<usize>::default();
access.add_read(1);
access.add_read(2);
access.add_write(3);
access.add_archetypal(5);
access.read_all();
access
}
fn create_sample_filtered_access() -> FilteredAccess<usize> {
let mut filtered_access = FilteredAccess::<usize>::default();
filtered_access.add_write(1);
filtered_access.add_read(2);
filtered_access.add_required(3);
filtered_access.and_with(4);
filtered_access
}
fn create_sample_access_filters() -> AccessFilters<usize> {
let mut access_filters = AccessFilters::<usize>::default();
access_filters.with.grow_and_insert(3);
access_filters.without.grow_and_insert(5);
access_filters
}
fn create_sample_filtered_access_set() -> FilteredAccessSet<usize> {
let mut filtered_access_set = FilteredAccessSet::<usize>::default();
filtered_access_set.add_unfiltered_read(2);
filtered_access_set.add_unfiltered_write(4);
filtered_access_set.read_all();
filtered_access_set
}
#[test]
fn test_access_clone() {
let original: Access<usize> = create_sample_access();
let cloned = original.clone();
assert_eq!(original, cloned);
}
#[test]
fn test_access_clone_from() {
let original: Access<usize> = create_sample_access();
let mut cloned = Access::<usize>::default();
cloned.add_write(7);
cloned.add_read(4);
cloned.add_archetypal(8);
cloned.write_all();
cloned.clone_from(&original);
assert_eq!(original, cloned);
}
#[test]
fn test_filtered_access_clone() {
let original: FilteredAccess<usize> = create_sample_filtered_access();
let cloned = original.clone();
assert_eq!(original, cloned);
}
#[test]
fn test_filtered_access_clone_from() {
let original: FilteredAccess<usize> = create_sample_filtered_access();
let mut cloned = FilteredAccess::<usize>::default();
cloned.add_write(7);
cloned.add_read(4);
cloned.append_or(&FilteredAccess::default());
cloned.clone_from(&original);
assert_eq!(original, cloned);
}
#[test]
fn test_access_filters_clone() {
let original: AccessFilters<usize> = create_sample_access_filters();
let cloned = original.clone();
assert_eq!(original, cloned);
}
#[test]
fn test_access_filters_clone_from() {
let original: AccessFilters<usize> = create_sample_access_filters();
let mut cloned = AccessFilters::<usize>::default();
cloned.with.grow_and_insert(1);
cloned.without.grow_and_insert(2);
cloned.clone_from(&original);
assert_eq!(original, cloned);
}
#[test]
fn test_filtered_access_set_clone() {
let original: FilteredAccessSet<usize> = create_sample_filtered_access_set();
let cloned = original.clone();
assert_eq!(original, cloned);
}
#[test]
fn test_filtered_access_set_from() {
let original: FilteredAccessSet<usize> = create_sample_filtered_access_set();
let mut cloned = FilteredAccessSet::<usize>::default();
cloned.add_unfiltered_read(7);
cloned.add_unfiltered_write(9);
cloned.write_all();
cloned.clone_from(&original);
assert_eq!(original, cloned);
}
#[test]
fn read_all_access_conflicts() {
// read_all / single write