Fix 2D BatchedInstanceBuffer clear (#12922)

# Objective

- `cargo run --release --example bevymark -- --benchmark --waves 160
--per-wave 1000 --mode mesh2d` runs slower and slower over time due to
`no_gpu_preprocessing::write_batched_instance_buffer<bevy_sprite::mesh2d::mesh::Mesh2dPipeline>`
taking longer and longer because the `BatchedInstanceBuffer` is not
cleared

## Solution

- Split the `clear_batched_instance_buffers` system into CPU and GPU
versions
- Use the CPU version for 2D meshes
This commit is contained in:
Robert Swain 2024-04-15 07:00:43 +02:00 committed by GitHub
parent 62f2a73cac
commit 5f05e75a70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 49 additions and 31 deletions

View file

@ -15,8 +15,8 @@ use bevy_ecs::{
use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4};
use bevy_render::{
batching::{
clear_batched_instance_buffers, gpu_preprocessing, no_gpu_preprocessing, GetBatchData,
GetFullBatchData, NoAutomaticBatching,
gpu_preprocessing, no_gpu_preprocessing, GetBatchData, GetFullBatchData,
NoAutomaticBatching,
},
mesh::*,
render_asset::RenderAssets,
@ -139,10 +139,14 @@ impl Plugin for MeshRenderPlugin {
.init_resource::<SkinIndices>()
.init_resource::<MorphUniform>()
.init_resource::<MorphIndices>()
.add_systems(ExtractSchedule, (extract_skins, extract_morphs))
.add_systems(
ExtractSchedule,
clear_batched_instance_buffers::<MeshPipeline>.before(ExtractMeshesSet),
(
extract_skins,
extract_morphs,
gpu_preprocessing::clear_batched_gpu_instance_buffers::<MeshPipeline>
.before(ExtractMeshesSet),
),
)
.add_systems(
Render,
@ -151,6 +155,9 @@ impl Plugin for MeshRenderPlugin {
prepare_morphs.in_set(RenderSet::PrepareResources),
prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
prepare_mesh_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
no_gpu_preprocessing::clear_batched_cpu_instance_buffers::<MeshPipeline>
.in_set(RenderSet::Cleanup)
.after(RenderSet::Render),
),
);
}

View file

@ -125,10 +125,28 @@ where
}
}
/// A system that runs early in extraction and clears out all the
/// [`BatchedInstanceBuffers`] for the frame.
///
/// We have to run this during extraction because, if GPU preprocessing is in
/// use, the extraction phase will write to the mesh input uniform buffers
/// directly, so the buffers need to be cleared before then.
pub fn clear_batched_gpu_instance_buffers<GFBD>(
gpu_batched_instance_buffers: Option<
ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
>,
) where
GFBD: GetFullBatchData,
{
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
gpu_batched_instance_buffers.clear();
}
}
/// A system that removes GPU preprocessing work item buffers that correspond to
/// deleted [`ViewTarget`]s.
///
/// This is a separate system from [`super::clear_batched_instance_buffers`]
/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
/// because [`ViewTarget`]s aren't created until after the extraction phase is
/// completed.
pub fn delete_old_work_item_buffers<GFBD>(

View file

@ -1,7 +1,7 @@
use bevy_ecs::{
component::Component,
entity::Entity,
system::{Query, ResMut, SystemParam, SystemParamItem},
system::{Query, SystemParam, SystemParamItem},
};
use bytemuck::Pod;
use nonmax::NonMaxU32;
@ -135,30 +135,6 @@ pub trait GetFullBatchData: GetBatchData {
) -> Option<NonMaxU32>;
}
/// A system that runs early in extraction and clears out all the
/// [`gpu_preprocessing::BatchedInstanceBuffers`] for the frame.
///
/// We have to run this during extraction because, if GPU preprocessing is in
/// use, the extraction phase will write to the mesh input uniform buffers
/// directly, so the buffers need to be cleared before then.
pub fn clear_batched_instance_buffers<GFBD>(
cpu_batched_instance_buffer: Option<
ResMut<no_gpu_preprocessing::BatchedInstanceBuffer<GFBD::BufferData>>,
>,
gpu_batched_instance_buffers: Option<
ResMut<gpu_preprocessing::BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
>,
) where
GFBD: GetFullBatchData,
{
if let Some(mut cpu_batched_instance_buffer) = cpu_batched_instance_buffer {
cpu_batched_instance_buffer.clear();
}
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
gpu_batched_instance_buffers.clear();
}
}
/// Sorts a render phase that uses bins.
pub fn sort_binned_render_phase<BPI>(mut views: Query<&mut BinnedRenderPhase<BPI>>)
where

View file

@ -43,6 +43,19 @@ where
}
}
/// A system that clears out the [`BatchedInstanceBuffer`] for the frame.
///
/// This needs to run before the CPU batched instance buffers are used.
pub fn clear_batched_cpu_instance_buffers<GBD>(
cpu_batched_instance_buffer: Option<ResMut<BatchedInstanceBuffer<GBD::BufferData>>>,
) where
GBD: GetBatchData,
{
if let Some(mut cpu_batched_instance_buffer) = cpu_batched_instance_buffer {
cpu_batched_instance_buffer.clear();
}
}
/// Batch the items in a sorted render phase, when GPU instance buffer building
/// isn't in use. This means comparing metadata needed to draw each phase item
/// and trying to combine the draws into a batch.

View file

@ -12,7 +12,8 @@ use bevy_ecs::{
use bevy_math::{Affine3, Vec4};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use bevy_render::batching::no_gpu_preprocessing::{
batch_and_prepare_sorted_render_phase, write_batched_instance_buffer, BatchedInstanceBuffer,
self, batch_and_prepare_sorted_render_phase, write_batched_instance_buffer,
BatchedInstanceBuffer,
};
use bevy_render::mesh::{GpuMesh, MeshVertexBufferLayoutRef};
use bevy_render::{
@ -107,6 +108,9 @@ impl Plugin for Mesh2dRenderPlugin {
.in_set(RenderSet::PrepareResourcesFlush),
prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups),
prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
no_gpu_preprocessing::clear_batched_cpu_instance_buffers::<Mesh2dPipeline>
.in_set(RenderSet::Cleanup)
.after(RenderSet::Render),
),
);
}