Optimize extract_clusters and prepare_clusters systems (#10633)

# Objective

When developing my game I realized `extract_clusters` and
`prepare_clusters` systems are taking a lot of time despite me creating
very little lights. Reducing number of clusters from the default 4096 to
2048 or less greatly improved performance and stabilized FPS (~300 ->
1000+). I debugged it and found out that the main reason for this is
cloning `VisiblePointLights` in `extract_clusters` system. It contains
light entities grouped by clusters that they affect. The problem is that
we clone 4096 (assuming the default clusters configuration) vectors
every frame. If many of them happen to be non-empty it starts to be a
bottleneck because there is a lot of heap allocation. It wouldn't be a
problem if we reused those vectors in following frames but we don't.

## Solution

Avoid cloning multiple vectors and instead build a single vector
containing data for all clusters.

I've recorded a trace in `3d_scene` example with disabled v-sync before
and after the change.
Mean FPS went from 424 to 990. Mean time for `extract_clusters` system
was reduced from 210 us to 24 us and `prepare_clusters` from 189 us to
87 us.


![image](https://github.com/bevyengine/bevy/assets/160391/ab66aa9d-1fa7-4993-9827-8be76b530972)

---

## Changelog

- Improved performance of `extract_clusters` and `prepare_clusters`
systems for scenes where lights affect a big part of it.
This commit is contained in:
Rafał Harabień 2024-01-29 18:50:22 +01:00 committed by GitHub
parent b17d42dbe9
commit 16ce8c6136
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -267,9 +267,14 @@ pub struct ExtractedClusterConfig {
dimensions: UVec3, dimensions: UVec3,
} }
enum ExtractedClustersPointLightsElement {
ClusterHeader(u32, u32),
LightEntity(Entity),
}
#[derive(Component)] #[derive(Component)]
pub struct ExtractedClustersPointLights { pub struct ExtractedClustersPointLights {
data: Vec<VisiblePointLights>, data: Vec<ExtractedClustersPointLightsElement>,
} }
pub fn extract_clusters( pub fn extract_clusters(
@ -281,10 +286,20 @@ pub fn extract_clusters(
continue; continue;
} }
let num_entities: usize = clusters.lights.iter().map(|l| l.entities.len()).sum();
let mut data = Vec::with_capacity(clusters.lights.len() + num_entities);
for cluster_lights in &clusters.lights {
data.push(ExtractedClustersPointLightsElement::ClusterHeader(
cluster_lights.point_light_count as u32,
cluster_lights.spot_light_count as u32,
));
for l in &cluster_lights.entities {
data.push(ExtractedClustersPointLightsElement::LightEntity(*l));
}
}
commands.get_or_spawn(entity).insert(( commands.get_or_spawn(entity).insert((
ExtractedClustersPointLights { ExtractedClustersPointLights { data },
data: clusters.lights.clone(),
},
ExtractedClusterConfig { ExtractedClusterConfig {
near: clusters.near, near: clusters.near,
far: clusters.far, far: clusters.far,
@ -1524,59 +1539,43 @@ pub fn prepare_clusters(
render_queue: Res<RenderQueue>, render_queue: Res<RenderQueue>,
mesh_pipeline: Res<MeshPipeline>, mesh_pipeline: Res<MeshPipeline>,
global_light_meta: Res<GlobalLightMeta>, global_light_meta: Res<GlobalLightMeta>,
views: Query< views: Query<(Entity, &ExtractedClustersPointLights), With<RenderPhase<Transparent3d>>>,
(
Entity,
&ExtractedClusterConfig,
&ExtractedClustersPointLights,
),
With<RenderPhase<Transparent3d>>,
>,
) { ) {
let render_device = render_device.into_inner(); let render_device = render_device.into_inner();
let supports_storage_buffers = matches!( let supports_storage_buffers = matches!(
mesh_pipeline.clustered_forward_buffer_binding_type, mesh_pipeline.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. } BufferBindingType::Storage { .. }
); );
for (entity, cluster_config, extracted_clusters) in &views { for (entity, extracted_clusters) in &views {
let mut view_clusters_bindings = let mut view_clusters_bindings =
ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type); ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type);
view_clusters_bindings.clear(); view_clusters_bindings.clear();
let mut indices_full = false; for record in &extracted_clusters.data {
match record {
let mut cluster_index = 0; ExtractedClustersPointLightsElement::ClusterHeader(
for _y in 0..cluster_config.dimensions.y { point_light_count,
for _x in 0..cluster_config.dimensions.x { spot_light_count,
for _z in 0..cluster_config.dimensions.z { ) => {
let offset = view_clusters_bindings.n_indices(); let offset = view_clusters_bindings.n_indices();
let cluster_lights = &extracted_clusters.data[cluster_index];
view_clusters_bindings.push_offset_and_counts( view_clusters_bindings.push_offset_and_counts(
offset, offset,
cluster_lights.point_light_count, *point_light_count as usize,
cluster_lights.spot_light_count, *spot_light_count as usize,
); );
}
if !indices_full { ExtractedClustersPointLightsElement::LightEntity(entity) => {
for entity in cluster_lights.iter() { if let Some(light_index) = global_light_meta.entity_to_index.get(entity) {
if let Some(light_index) = global_light_meta.entity_to_index.get(entity) if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES
{
if view_clusters_bindings.n_indices()
>= ViewClusterBindings::MAX_INDICES
&& !supports_storage_buffers && !supports_storage_buffers
{ {
warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters.");
indices_full = true;
break; break;
} }
view_clusters_bindings.push_index(*light_index); view_clusters_bindings.push_index(*light_index);
} }
} }
} }
cluster_index += 1;
}
}
} }
view_clusters_bindings.write_buffers(render_device, &render_queue); view_clusters_bindings.write_buffers(render_device, &render_queue);