From 9d6a4fbc8545674d916ebe78f32b219846a6737b Mon Sep 17 00:00:00 2001 From: IceSentry Date: Mon, 12 Aug 2024 11:38:24 -0400 Subject: [PATCH] Use BinnedRenderPhase for Opaque2d (#13091) Based on top of #12982 and #13069 # Objective - Opaque2d was implemented with SortedRenderPhase but BinnedRenderPhase should be much faster ## Solution - Implement BinnedRenderPhase for Opaque2d ## Notes While testing this PR, before the change I had ~14 fps in bevymark with 100k entities. After this change I get ~71 fps, compared to using sprites where I only get ~63 fps. This means that after this PR mesh2d with opaque meshes will be faster than the sprite path. This is not a 1 to 1 comparison since sprites do alpha blending. --- .../src/core_2d/main_opaque_pass_2d_node.rs | 8 +- crates/bevy_core_pipeline/src/core_2d/mod.rs | 89 +++++++++++------- crates/bevy_sprite/Cargo.toml | 3 +- crates/bevy_sprite/src/mesh2d/material.rs | 28 +++--- crates/bevy_sprite/src/mesh2d/mesh.rs | 92 ++++++++++++++++++- 5 files changed, 167 insertions(+), 53 deletions(-) diff --git a/crates/bevy_core_pipeline/src/core_2d/main_opaque_pass_2d_node.rs b/crates/bevy_core_pipeline/src/core_2d/main_opaque_pass_2d_node.rs index 3001340d47..0d3d532e27 100644 --- a/crates/bevy_core_pipeline/src/core_2d/main_opaque_pass_2d_node.rs +++ b/crates/bevy_core_pipeline/src/core_2d/main_opaque_pass_2d_node.rs @@ -4,7 +4,7 @@ use bevy_render::{ camera::ExtractedCamera, diagnostic::RecordDiagnostics, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, - render_phase::{TrackedRenderPass, ViewSortedRenderPhases}, + render_phase::{TrackedRenderPass, ViewBinnedRenderPhases}, render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, renderer::RenderContext, view::{ViewDepthTexture, ViewTarget}, @@ -13,7 +13,7 @@ use bevy_utils::tracing::error; #[cfg(feature = "trace")] use bevy_utils::tracing::info_span; -/// A [`bevy_render::render_graph::Node`] that runs the [`Opaque2d`] [`ViewSortedRenderPhases`] +/// A [`bevy_render::render_graph::Node`] that runs the [`Opaque2d`] [`ViewBinnedRenderPhases`] #[derive(Default)] pub struct MainOpaquePass2dNode; impl ViewNode for MainOpaquePass2dNode { @@ -30,7 +30,7 @@ impl ViewNode for MainOpaquePass2dNode { (camera, target, depth): QueryItem<'w, Self::ViewQuery>, world: &'w World, ) -> Result<(), NodeRunError> { - let Some(opaque_phases) = world.get_resource::>() else { + let Some(opaque_phases) = world.get_resource::>() else { return Ok(()); }; @@ -69,7 +69,7 @@ impl ViewNode for MainOpaquePass2dNode { } // Opaque draws - if !opaque_phase.items.is_empty() { + if !opaque_phase.is_empty() { #[cfg(feature = "trace")] let _opaque_main_pass_2d_span = info_span!("opaque_main_pass_2d").entered(); if let Err(err) = opaque_phase.render(&mut render_pass, world, view_entity) { diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs index e479e45853..06aa8b5b7f 100644 --- a/crates/bevy_core_pipeline/src/core_2d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs @@ -32,6 +32,7 @@ pub mod graph { use std::ops::Range; +use bevy_asset::UntypedAssetId; use bevy_utils::HashMap; pub use camera_2d::*; pub use main_opaque_pass_2d_node::*; @@ -45,12 +46,13 @@ use bevy_render::{ extract_component::ExtractComponentPlugin, render_graph::{EmptyNode, RenderGraphApp, ViewNodeRunner}, render_phase::{ - sort_phase_system, CachedRenderPipelinePhaseItem, DrawFunctionId, DrawFunctions, PhaseItem, - PhaseItemExtraIndex, SortedPhaseItem, ViewSortedRenderPhases, + sort_phase_system, BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, + DrawFunctions, PhaseItem, PhaseItemExtraIndex, SortedPhaseItem, ViewBinnedRenderPhases, + ViewSortedRenderPhases, }, render_resource::{ - CachedRenderPipelineId, Extent3d, TextureDescriptor, TextureDimension, TextureFormat, - TextureUsages, + BindGroupId, CachedRenderPipelineId, Extent3d, TextureDescriptor, TextureDimension, + TextureFormat, TextureUsages, }, renderer::RenderDevice, texture::TextureCache, @@ -78,12 +80,11 @@ impl Plugin for Core2dPlugin { .init_resource::>() .init_resource::>() .init_resource::>() - .init_resource::>() + .init_resource::>() .add_systems(ExtractSchedule, extract_core_2d_camera_phases) .add_systems( Render, ( - sort_phase_system::.in_set(RenderSet::PhaseSort), sort_phase_system::.in_set(RenderSet::PhaseSort), prepare_core_2d_depth_textures.in_set(RenderSet::PrepareResources), ), @@ -119,24 +120,47 @@ impl Plugin for Core2dPlugin { } } -/// Opaque 2D [`SortedPhaseItem`]s. +/// Opaque 2D [`BinnedPhaseItem`]s. pub struct Opaque2d { - pub sort_key: FloatOrd, - pub entity: Entity, - pub pipeline: CachedRenderPipelineId, - pub draw_function: DrawFunctionId, + /// The key, which determines which can be batched. + pub key: Opaque2dBinKey, + /// An entity from which data will be fetched, including the mesh if + /// applicable. + pub representative_entity: Entity, + /// The ranges of instances. pub batch_range: Range, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. pub extra_index: PhaseItemExtraIndex, } + +/// Data that must be identical in order to batch phase items together. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Opaque2dBinKey { + /// The identifier of the render pipeline. + pub pipeline: CachedRenderPipelineId, + /// The function used to draw. + pub draw_function: DrawFunctionId, + /// The asset that this phase item is associated with. + /// + /// Normally, this is the ID of the mesh, but for non-mesh items it might be + /// the ID of another type of asset. + pub asset_id: UntypedAssetId, + /// The ID of a bind group specific to the material. + /// + /// In the case of PBR, this is the `MaterialBindGroupId`. + pub material_bind_group_id: Option, +} + impl PhaseItem for Opaque2d { #[inline] fn entity(&self) -> Entity { - self.entity + self.representative_entity } #[inline] fn draw_function(&self) -> DrawFunctionId { - self.draw_function + self.key.draw_function } #[inline] @@ -158,25 +182,28 @@ impl PhaseItem for Opaque2d { } } -impl SortedPhaseItem for Opaque2d { - type SortKey = FloatOrd; +impl BinnedPhaseItem for Opaque2d { + type BinKey = Opaque2dBinKey; - #[inline] - fn sort_key(&self) -> Self::SortKey { - self.sort_key - } - - #[inline] - fn sort(items: &mut [Self]) { - // radsort is a stable radix sort that performed better than `slice::sort_by_key` or `slice::sort_unstable_by_key`. - radsort::sort_by_key(items, |item| item.sort_key().0); + fn new( + key: Self::BinKey, + representative_entity: Entity, + batch_range: Range, + extra_index: PhaseItemExtraIndex, + ) -> Self { + Opaque2d { + key, + representative_entity, + batch_range, + extra_index, + } } } impl CachedRenderPipelinePhaseItem for Opaque2d { #[inline] fn cached_pipeline(&self) -> CachedRenderPipelineId { - self.pipeline + self.key.pipeline } } @@ -246,7 +273,7 @@ impl CachedRenderPipelinePhaseItem for Transparent2d { pub fn extract_core_2d_camera_phases( mut commands: Commands, mut transparent_2d_phases: ResMut>, - mut opaque_2d_phases: ResMut>, + mut opaque_2d_phases: ResMut>, cameras_2d: Extract>>, mut live_entities: Local, ) { @@ -273,13 +300,13 @@ pub fn prepare_core_2d_depth_textures( mut commands: Commands, mut texture_cache: ResMut, render_device: Res, - transparent_2d_phases: ResMut>, - opaque_2d_phases: ResMut>, + transparent_2d_phases: Res>, + opaque_2d_phases: Res>, views_2d: Query<(Entity, &ExtractedCamera, &Msaa), (With,)>, ) { let mut textures = HashMap::default(); - for (entity, camera, msaa) in &views_2d { - if !opaque_2d_phases.contains_key(&entity) || !transparent_2d_phases.contains_key(&entity) { + for (view, camera, msaa) in &views_2d { + if !opaque_2d_phases.contains_key(&view) || !transparent_2d_phases.contains_key(&view) { continue; }; @@ -313,7 +340,7 @@ pub fn prepare_core_2d_depth_textures( .clone(); commands - .entity(entity) + .entity(view) .insert(ViewDepthTexture::new(cached_texture, Some(0.0))); } } diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml index 93cf7771d3..367e7bb68c 100644 --- a/crates/bevy_sprite/Cargo.toml +++ b/crates/bevy_sprite/Cargo.toml @@ -29,13 +29,14 @@ bevy_utils = { path = "../bevy_utils", version = "0.15.0-dev" } bevy_derive = { path = "../bevy_derive", version = "0.15.0-dev" } # other -bytemuck = { version = "1.5", features = ["derive"] } +bytemuck = { version = "1", features = ["derive", "must_cast"] } fixedbitset = "0.5" guillotiere = "0.6.0" thiserror = "1.0" rectangle-pack = "0.4" bitflags = "2.3" radsort = "0.1" +nonmax = "0.5" [lints] workspace = true diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 801c4b722f..64d1755d13 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -1,7 +1,7 @@ use bevy_app::{App, Plugin}; use bevy_asset::{Asset, AssetApp, AssetId, AssetServer, Handle}; use bevy_core_pipeline::{ - core_2d::{Opaque2d, Transparent2d}, + core_2d::{Opaque2d, Opaque2dBinKey, Transparent2d}, tonemapping::{DebandDither, Tonemapping}, }; use bevy_derive::{Deref, DerefMut}; @@ -18,8 +18,9 @@ use bevy_render::{ prepare_assets, PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets, }, render_phase::{ - AddRenderCommand, DrawFunctions, PhaseItem, PhaseItemExtraIndex, RenderCommand, - RenderCommandResult, SetItemPipeline, TrackedRenderPass, ViewSortedRenderPhases, + AddRenderCommand, BinnedRenderPhaseType, DrawFunctions, PhaseItem, PhaseItemExtraIndex, + RenderCommand, RenderCommandResult, SetItemPipeline, TrackedRenderPass, + ViewBinnedRenderPhases, ViewSortedRenderPhases, }, render_resource::{ AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout, @@ -404,7 +405,7 @@ pub fn queue_material2d_meshes( mut render_mesh_instances: ResMut, render_material_instances: Res>, mut transparent_render_phases: ResMut>, - mut opaque_render_phases: ResMut>, + mut opaque_render_phases: ResMut>, mut views: Query<( Entity, &ExtractedView, @@ -484,16 +485,17 @@ pub fn queue_material2d_meshes( match material_2d.properties.alpha_mode { AlphaMode2d::Opaque => { - opaque_phase.add(Opaque2d { - entity: *visible_entity, - draw_function: draw_opaque_2d, + let bin_key = Opaque2dBinKey { pipeline: pipeline_id, - // Front-to-back ordering - sort_key: -FloatOrd(mesh_z + material_2d.properties.depth_bias), - // Batching is done in batch_and_prepare_render_phase - batch_range: 0..1, - extra_index: PhaseItemExtraIndex::NONE, - }); + draw_function: draw_opaque_2d, + asset_id: mesh_instance.mesh_asset_id.into(), + material_bind_group_id: material_2d.get_bind_group_id().0, + }; + opaque_phase.add( + bin_key, + *visible_entity, + BinnedRenderPhaseType::mesh(mesh_instance.automatic_batching), + ); } AlphaMode2d::Blend => { transparent_phase.add(Transparent2d { diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index d21ad1fc60..1f4f33510c 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -14,10 +14,13 @@ use bevy_ecs::{ }; use bevy_math::{Affine3, Vec4}; use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use bevy_render::batching::gpu_preprocessing::IndirectParameters; +use bevy_render::batching::no_gpu_preprocessing::batch_and_prepare_binned_render_phase; use bevy_render::batching::no_gpu_preprocessing::{ self, batch_and_prepare_sorted_render_phase, write_batched_instance_buffer, BatchedInstanceBuffer, }; +use bevy_render::batching::GetFullBatchData; use bevy_render::mesh::allocator::MeshAllocator; use bevy_render::mesh::{MeshVertexBufferLayoutRef, RenderMesh}; use bevy_render::texture::FallbackImage; @@ -38,6 +41,8 @@ use bevy_render::{ Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_transform::components::GlobalTransform; +use bevy_utils::tracing::error; +use nonmax::NonMaxU32; use crate::Material2dBindGroupId; @@ -107,7 +112,7 @@ impl Plugin for Mesh2dRenderPlugin { .add_systems( Render, ( - batch_and_prepare_sorted_render_phase:: + batch_and_prepare_binned_render_phase:: .in_set(RenderSet::PrepareResources), batch_and_prepare_sorted_render_phase:: .in_set(RenderSet::PrepareResources), @@ -163,7 +168,7 @@ pub struct Mesh2dTransforms { pub flags: u32, } -#[derive(ShaderType, Clone)] +#[derive(ShaderType, Clone, Copy)] pub struct Mesh2dUniform { // Affine 4x3 matrix transposed to 3x4 pub world_from_local: [Vec4; 3], @@ -360,12 +365,16 @@ impl Mesh2dPipeline { } impl GetBatchData for Mesh2dPipeline { - type Param = SRes; + type Param = ( + SRes, + SRes>, + SRes, + ); type CompareData = (Material2dBindGroupId, AssetId); type BufferData = Mesh2dUniform; fn get_batch_data( - mesh_instances: &SystemParamItem, + (mesh_instances, _, _): &SystemParamItem, entity: Entity, ) -> Option<(Self::BufferData, Option)> { let mesh_instance = mesh_instances.get(&entity)?; @@ -379,6 +388,81 @@ impl GetBatchData for Mesh2dPipeline { } } +impl GetFullBatchData for Mesh2dPipeline { + type BufferInputData = (); + + fn get_binned_batch_data( + (mesh_instances, _, _): &SystemParamItem, + entity: Entity, + ) -> Option { + let mesh_instance = mesh_instances.get(&entity)?; + Some((&mesh_instance.transforms).into()) + } + + fn get_index_and_compare_data( + _: &SystemParamItem, + _query_item: Entity, + ) -> Option<(NonMaxU32, Option)> { + error!( + "`get_index_and_compare_data` is only intended for GPU mesh uniform building, \ + but this is not yet implemented for 2d meshes" + ); + None + } + + fn get_binned_index( + _: &SystemParamItem, + _query_item: Entity, + ) -> Option { + error!( + "`get_binned_index` is only intended for GPU mesh uniform building, \ + but this is not yet implemented for 2d meshes" + ); + None + } + + fn get_batch_indirect_parameters_index( + (mesh_instances, meshes, mesh_allocator): &SystemParamItem, + indirect_parameters_buffer: &mut bevy_render::batching::gpu_preprocessing::IndirectParametersBuffer, + entity: Entity, + instance_index: u32, + ) -> Option { + let mesh_instance = mesh_instances.get(&entity)?; + let mesh = meshes.get(mesh_instance.mesh_asset_id)?; + let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id)?; + + // Note that `IndirectParameters` covers both of these structures, even + // though they actually have distinct layouts. See the comment above that + // type for more information. + let indirect_parameters = match mesh.buffer_info { + RenderMeshBufferInfo::Indexed { + count: index_count, .. + } => { + let index_buffer_slice = + mesh_allocator.mesh_index_slice(&mesh_instance.mesh_asset_id)?; + IndirectParameters { + vertex_or_index_count: index_count, + instance_count: 0, + first_vertex_or_first_index: index_buffer_slice.range.start, + base_vertex_or_first_instance: vertex_buffer_slice.range.start, + first_instance: instance_index, + } + } + RenderMeshBufferInfo::NonIndexed => IndirectParameters { + vertex_or_index_count: mesh.vertex_count, + instance_count: 0, + first_vertex_or_first_index: vertex_buffer_slice.range.start, + base_vertex_or_first_instance: instance_index, + first_instance: instance_index, + }, + }; + + (indirect_parameters_buffer.push(indirect_parameters) as u32) + .try_into() + .ok() + } +} + bitflags::bitflags! { #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(transparent)]