From 0a11af937592903e8387a8972ec316a8abf1923d Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 15 Aug 2023 08:00:23 +0200 Subject: [PATCH] Reduce the size of MeshUniform to improve performance (#9416) # Objective - Significantly reduce the size of MeshUniform by only including necessary data. ## Solution Local to world, model transforms are affine. This means they only need a 4x3 matrix to represent them. `MeshUniform` stores the current, and previous model transforms, and the inverse transpose of the current model transform, all as 4x4 matrices. Instead we can store the current, and previous model transforms as 4x3 matrices, and we only need the upper-left 3x3 part of the inverse transpose of the current model transform. This change allows us to reduce the serialized MeshUniform size from 208 bytes to 144 bytes, which is over a 30% saving in data to serialize, and VRAM bandwidth and space. ## Benchmarks On an M1 Max, running `many_cubes -- sphere`, main is in yellow, this PR is in red: Screenshot 2023-08-11 at 02 36 43 A reduction in frame time of ~14%. --- ## Changelog - Changed: Redefined `MeshUniform` to improve performance by using 4x3 affine transforms and reconstructing 4x4 matrices in the shader. Helper functions were added to `bevy_pbr::mesh_functions` to unpack the data. `affine_to_square` converts the packed 4x3 in 3x4 matrix data to a 4x4 matrix. `mat2x4_f32_to_mat3x3` converts the 3x3 in mat2x4 + f32 matrix data back into a 3x3. ## Migration Guide Shader code before: ``` var model = mesh[instance_index].model; ``` Shader code after: ``` #import bevy_pbr::mesh_functions affine_to_square var model = affine_to_square(mesh[instance_index].model); ``` --- assets/shaders/custom_vertex_attribute.wgsl | 5 +- assets/shaders/instancing.wgsl | 12 +- crates/bevy_math/src/affine3.rs | 29 +++++ crates/bevy_math/src/lib.rs | 2 + crates/bevy_pbr/src/light.rs | 6 +- crates/bevy_pbr/src/material.rs | 9 +- crates/bevy_pbr/src/prepass/mod.rs | 18 +-- crates/bevy_pbr/src/prepass/prepass.wgsl | 10 +- crates/bevy_pbr/src/render/mesh.rs | 122 +++++++++++++++--- crates/bevy_pbr/src/render/mesh.wgsl | 12 +- .../bevy_pbr/src/render/mesh_functions.wgsl | 36 +++++- crates/bevy_pbr/src/render/mesh_types.wgsl | 14 +- crates/bevy_pbr/src/render/wireframe.wgsl | 5 +- crates/bevy_pbr/src/wireframe.rs | 13 +- crates/bevy_render/src/primitives/mod.rs | 33 ++--- .../src/render_phase/rangefinder.rs | 10 +- crates/bevy_render/src/view/visibility/mod.rs | 2 +- examples/shader/shader_instancing.rs | 9 +- 18 files changed, 249 insertions(+), 98 deletions(-) create mode 100644 crates/bevy_math/src/affine3.rs diff --git a/assets/shaders/custom_vertex_attribute.wgsl b/assets/shaders/custom_vertex_attribute.wgsl index 802c28df13..79454073a6 100644 --- a/assets/shaders/custom_vertex_attribute.wgsl +++ b/assets/shaders/custom_vertex_attribute.wgsl @@ -1,6 +1,5 @@ #import bevy_pbr::mesh_bindings mesh -#import bevy_pbr::mesh_functions mesh_position_local_to_clip -#import bevy_render::instance_index +#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip struct CustomMaterial { color: vec4, @@ -23,7 +22,7 @@ struct VertexOutput { fn vertex(vertex: Vertex) -> VertexOutput { var out: VertexOutput; out.clip_position = mesh_position_local_to_clip( - mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model, + get_model_matrix(vertex.instance_index), vec4(vertex.position, 1.0), ); out.blend_color = vertex.blend_color; diff --git a/assets/shaders/instancing.wgsl b/assets/shaders/instancing.wgsl index 055c9dbf41..a785b088be 100644 --- a/assets/shaders/instancing.wgsl +++ b/assets/shaders/instancing.wgsl @@ -1,4 +1,4 @@ -#import bevy_pbr::mesh_functions mesh_position_local_to_clip +#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip #import bevy_pbr::mesh_bindings mesh struct Vertex { @@ -19,12 +19,12 @@ struct VertexOutput { fn vertex(vertex: Vertex) -> VertexOutput { let position = vertex.position * vertex.i_pos_scale.w + vertex.i_pos_scale.xyz; var out: VertexOutput; - // NOTE: The 0 index into the Mesh array is a hack for this example as the - // instance_index builtin would map to the wrong index in the Mesh array. - // This index could be passed in via another uniform instead but it's - // unnecessary for the example. + // NOTE: Passing 0 as the instance_index to get_model_matrix() is a hack + // for this example as the instance_index builtin would map to the wrong + // index in the Mesh array. This index could be passed in via another + // uniform instead but it's unnecessary for the example. out.clip_position = mesh_position_local_to_clip( - mesh[0].model, + get_model_matrix(0), vec4(position, 1.0) ); out.color = vertex.i_color; diff --git a/crates/bevy_math/src/affine3.rs b/crates/bevy_math/src/affine3.rs new file mode 100644 index 0000000000..51598e4bea --- /dev/null +++ b/crates/bevy_math/src/affine3.rs @@ -0,0 +1,29 @@ +use glam::{Affine3A, Mat3, Vec3}; + +/// Reduced-size version of `glam::Affine3A` for use when storage has +/// significant performance impact. Convert to `glam::Affine3A` to do +/// non-trivial calculations. +pub struct Affine3 { + /// Scaling, rotation, shears, and other non-translation affine transforms + pub matrix3: Mat3, + /// Translation + pub translation: Vec3, +} + +impl From<&Affine3A> for Affine3 { + fn from(affine: &Affine3A) -> Self { + Self { + matrix3: affine.matrix3.into(), + translation: affine.translation.into(), + } + } +} + +impl From<&Affine3> for Affine3A { + fn from(affine3: &Affine3) -> Self { + Self { + matrix3: affine3.matrix3.into(), + translation: affine3.translation.into(), + } + } +} diff --git a/crates/bevy_math/src/lib.rs b/crates/bevy_math/src/lib.rs index de0bc681c8..1c75dc015d 100644 --- a/crates/bevy_math/src/lib.rs +++ b/crates/bevy_math/src/lib.rs @@ -7,10 +7,12 @@ #![allow(clippy::type_complexity)] #![warn(missing_docs)] +mod affine3; pub mod cubic_splines; mod ray; mod rects; +pub use affine3::*; pub use ray::Ray; pub use rects::*; diff --git a/crates/bevy_pbr/src/light.rs b/crates/bevy_pbr/src/light.rs index 3e8c0d4517..08b21db7a3 100644 --- a/crates/bevy_pbr/src/light.rs +++ b/crates/bevy_pbr/src/light.rs @@ -2025,7 +2025,7 @@ pub fn check_light_mesh_visibility( view_frusta.iter().zip(view_visible_entities) { // Disable near-plane culling, as a shadow caster could lie before the near plane. - if !frustum.intersects_obb(aabb, &transform.compute_matrix(), false, true) { + if !frustum.intersects_obb(aabb, &transform.affine(), false, true) { continue; } @@ -2098,7 +2098,7 @@ pub fn check_light_mesh_visibility( // If we have an aabb and transform, do frustum culling if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) { - let model_to_world = transform.compute_matrix(); + let model_to_world = transform.affine(); // Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light if !light_sphere.intersects_obb(aabb, &model_to_world) { continue; @@ -2162,7 +2162,7 @@ pub fn check_light_mesh_visibility( // If we have an aabb and transform, do frustum culling if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) { - let model_to_world = transform.compute_matrix(); + let model_to_world = transform.affine(); // Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light if !light_sphere.intersects_obb(aabb, &model_to_world) { continue; diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index e14c9f9e9d..13e736d139 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -1,6 +1,6 @@ use crate::{ render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey, - MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems, + MeshTransforms, MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems, ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow, }; use bevy_app::{App, Plugin}; @@ -382,7 +382,7 @@ pub fn queue_material_meshes( material_meshes: Query<( &Handle, &Handle, - &MeshUniform, + &MeshTransforms, &GpuArrayBufferIndex, )>, images: Res>, @@ -468,7 +468,7 @@ pub fn queue_material_meshes( let rangefinder = view.rangefinder3d(); for visible_entity in &visible_entities.entities { - if let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) = + if let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) = material_meshes.get(*visible_entity) { if let (Some(mesh), Some(material)) = ( @@ -516,7 +516,8 @@ pub fn queue_material_meshes( } }; - let distance = rangefinder.distance(&mesh_uniform.transform) + let distance = rangefinder + .distance_translation(&mesh_transforms.transform.translation) + material.properties.depth_bias; match material.properties.alpha_mode { AlphaMode::Opaque => { diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 9b1bde4b26..bfdd3ff003 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -15,7 +15,7 @@ use bevy_ecs::{ SystemParamItem, }, }; -use bevy_math::Mat4; +use bevy_math::{Affine3A, Mat4}; use bevy_reflect::TypeUuid; use bevy_render::{ globals::{GlobalsBuffer, GlobalsUniform}, @@ -46,8 +46,8 @@ use bevy_utils::tracing::error; use crate::{ prepare_lights, setup_morph_and_skinning_defs, AlphaMode, DrawMesh, Material, MaterialPipeline, - MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshUniform, RenderMaterials, - SetMaterialBindGroup, SetMeshBindGroup, + MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshTransforms, MeshUniform, + RenderMaterials, SetMaterialBindGroup, SetMeshBindGroup, }; use std::{hash::Hash, marker::PhantomData}; @@ -203,7 +203,7 @@ pub fn update_previous_view_projections( } #[derive(Component)] -pub struct PreviousGlobalTransform(pub Mat4); +pub struct PreviousGlobalTransform(pub Affine3A); pub fn update_mesh_previous_global_transforms( mut commands: Commands, @@ -216,7 +216,7 @@ pub fn update_mesh_previous_global_transforms( for (entity, transform) in &meshes { commands .entity(entity) - .insert(PreviousGlobalTransform(transform.compute_matrix())); + .insert(PreviousGlobalTransform(transform.affine())); } } } @@ -762,7 +762,7 @@ pub fn queue_prepass_material_meshes( material_meshes: Query<( &Handle, &Handle, - &MeshUniform, + &MeshTransforms, &GpuArrayBufferIndex, )>, mut views: Query<( @@ -809,7 +809,7 @@ pub fn queue_prepass_material_meshes( let rangefinder = view.rangefinder3d(); for visible_entity in &visible_entities.entities { - let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) = material_meshes.get(*visible_entity) else { + let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) = material_meshes.get(*visible_entity) else { continue; }; @@ -852,8 +852,8 @@ pub fn queue_prepass_material_meshes( } }; - let distance = - rangefinder.distance(&mesh_uniform.transform) + material.properties.depth_bias; + let distance = rangefinder.distance_translation(&mesh_transforms.transform.translation) + + material.properties.depth_bias; match alpha_mode { AlphaMode::Opaque => { opaque_phase.add(Opaque3dPrepass { diff --git a/crates/bevy_pbr/src/prepass/prepass.wgsl b/crates/bevy_pbr/src/prepass/prepass.wgsl index 82fa387f12..396935ff41 100644 --- a/crates/bevy_pbr/src/prepass/prepass.wgsl +++ b/crates/bevy_pbr/src/prepass/prepass.wgsl @@ -3,7 +3,7 @@ #import bevy_pbr::skinning #import bevy_pbr::morph #import bevy_pbr::mesh_bindings mesh -#import bevy_render::instance_index +#import bevy_render::instance_index get_instance_index // Most of these attributes are not used in the default prepass fragment shader, but they are still needed so we can // pass them to custom prepass shaders like pbr_prepass.wgsl. @@ -92,7 +92,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { #else // SKINNED // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model; + var model = bevy_pbr::mesh_functions::get_model_matrix(vertex_no_morph.instance_index); #endif // SKINNED out.clip_position = bevy_pbr::mesh_functions::mesh_position_local_to_clip(model, vec4(vertex.position, 1.0)); @@ -113,7 +113,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { vertex.normal, // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index) + get_instance_index(vertex_no_morph.instance_index) ); #endif // SKINNED @@ -123,7 +123,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { vertex.tangent, // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index) + get_instance_index(vertex_no_morph.instance_index) ); #endif // VERTEX_TANGENTS #endif // NORMAL_PREPASS @@ -133,7 +133,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput { // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 out.previous_world_position = bevy_pbr::mesh_functions::mesh_position_local_to_world( - mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].previous_model, + bevy_pbr::mesh_functions::get_previous_model_matrix(vertex_no_morph.instance_index), vec4(vertex.position, 1.0) ); #endif // MOTION_VECTOR_PREPASS diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 46693e8097..6c61f05b8f 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -18,11 +18,10 @@ use bevy_ecs::{ query::ROQueryItem, system::{lifetimeless::*, SystemParamItem, SystemState}, }; -use bevy_math::{Mat3A, Mat4, Vec2}; +use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4}; use bevy_reflect::TypeUuid; use bevy_render::{ globals::{GlobalsBuffer, GlobalsUniform}, - gpu_component_array_buffer::GpuComponentArrayBufferPlugin, mesh::{ skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, GpuBufferInfo, InnerMeshVertexBufferLayout, Mesh, MeshVertexBufferLayout, @@ -115,8 +114,6 @@ impl Plugin for MeshRenderPlugin { load_internal_asset!(app, SKINNING_HANDLE, "skinning.wgsl", Shader::from_wgsl); load_internal_asset!(app, MORPH_HANDLE, "morph.wgsl", Shader::from_wgsl); - app.add_plugins(GpuComponentArrayBufferPlugin::::default()); - if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { render_app .init_resource::() @@ -129,6 +126,7 @@ impl Plugin for MeshRenderPlugin { .add_systems( Render, ( + prepare_mesh_uniforms.in_set(RenderSet::Prepare), prepare_skinned_meshes.in_set(RenderSet::Prepare), prepare_morphs.in_set(RenderSet::Prepare), queue_mesh_bind_group.in_set(RenderSet::Queue), @@ -151,7 +149,11 @@ impl Plugin for MeshRenderPlugin { )); } - render_app.init_resource::(); + render_app + .insert_resource(GpuArrayBuffer::::new( + render_app.world.resource::(), + )) + .init_resource::(); } // Load the mesh_bindings shader module here as it depends on runtime information about @@ -166,14 +168,76 @@ impl Plugin for MeshRenderPlugin { } } -#[derive(Component, ShaderType, Clone)] -pub struct MeshUniform { - pub transform: Mat4, - pub previous_transform: Mat4, - pub inverse_transpose_model: Mat4, +#[derive(Component)] +pub struct MeshTransforms { + pub transform: Affine3, + pub previous_transform: Affine3, pub flags: u32, } +#[derive(ShaderType, Clone)] +pub struct MeshUniform { + // Affine 4x3 matrices transposed to 3x4 + pub transform: [Vec4; 3], + pub previous_transform: [Vec4; 3], + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + pub inverse_transpose_model_a: [Vec4; 2], + pub inverse_transpose_model_b: f32, + pub flags: u32, +} + +impl From<&MeshTransforms> for MeshUniform { + fn from(mesh_transforms: &MeshTransforms) -> Self { + let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose(); + let transpose_previous_model_3x3 = mesh_transforms.previous_transform.matrix3.transpose(); + let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform) + .inverse() + .matrix3 + .transpose(); + Self { + transform: [ + transpose_model_3x3 + .x_axis + .extend(mesh_transforms.transform.translation.x), + transpose_model_3x3 + .y_axis + .extend(mesh_transforms.transform.translation.y), + transpose_model_3x3 + .z_axis + .extend(mesh_transforms.transform.translation.z), + ], + previous_transform: [ + transpose_previous_model_3x3 + .x_axis + .extend(mesh_transforms.previous_transform.translation.x), + transpose_previous_model_3x3 + .y_axis + .extend(mesh_transforms.previous_transform.translation.y), + transpose_previous_model_3x3 + .z_axis + .extend(mesh_transforms.previous_transform.translation.z), + ], + inverse_transpose_model_a: [ + ( + inverse_transpose_model_3x3.x_axis, + inverse_transpose_model_3x3.y_axis.x, + ) + .into(), + ( + inverse_transpose_model_3x3.y_axis.yz(), + inverse_transpose_model_3x3.z_axis.xy(), + ) + .into(), + ], + inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z, + flags: mesh_transforms.flags, + } + } +} + // NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_types.wgsl! bitflags::bitflags! { #[repr(transparent)] @@ -210,26 +274,25 @@ pub fn extract_meshes( for (entity, _, transform, previous_transform, handle, not_receiver, not_caster) in visible_meshes { - let transform = transform.compute_matrix(); + let transform = transform.affine(); let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); let mut flags = if not_receiver.is_some() { MeshFlags::empty() } else { MeshFlags::SHADOW_RECEIVER }; - if Mat3A::from_mat4(transform).determinant().is_sign_positive() { + if transform.matrix3.determinant().is_sign_positive() { flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; } - let uniform = MeshUniform { + let transforms = MeshTransforms { + transform: (&transform).into(), + previous_transform: (&previous_transform).into(), flags: flags.bits(), - transform, - previous_transform, - inverse_transpose_model: transform.inverse().transpose(), }; if not_caster.is_some() { - not_caster_commands.push((entity, (handle.clone_weak(), uniform, NotShadowCaster))); + not_caster_commands.push((entity, (handle.clone_weak(), transforms, NotShadowCaster))); } else { - caster_commands.push((entity, (handle.clone_weak(), uniform))); + caster_commands.push((entity, (handle.clone_weak(), transforms))); } } *prev_caster_commands_len = caster_commands.len(); @@ -317,6 +380,29 @@ pub fn extract_skinned_meshes( commands.insert_or_spawn_batch(values); } +fn prepare_mesh_uniforms( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mut gpu_array_buffer: ResMut>, + components: Query<(Entity, &MeshTransforms)>, +) { + gpu_array_buffer.clear(); + + let entities = components + .iter() + .map(|(entity, mesh_transforms)| { + ( + entity, + gpu_array_buffer.push(MeshUniform::from(mesh_transforms)), + ) + }) + .collect::>(); + commands.insert_or_spawn_batch(entities); + + gpu_array_buffer.write_buffer(&render_device, &render_queue); +} + #[derive(Resource, Clone)] pub struct MeshPipeline { pub view_layout: BindGroupLayout, diff --git a/crates/bevy_pbr/src/render/mesh.wgsl b/crates/bevy_pbr/src/render/mesh.wgsl index 106ec54ae0..0755c6859e 100644 --- a/crates/bevy_pbr/src/render/mesh.wgsl +++ b/crates/bevy_pbr/src/render/mesh.wgsl @@ -3,7 +3,7 @@ #import bevy_pbr::morph #import bevy_pbr::mesh_bindings mesh #import bevy_pbr::mesh_vertex_output MeshVertexOutput -#import bevy_render::instance_index +#import bevy_render::instance_index get_instance_index struct Vertex { @builtin(instance_index) instance_index: u32, @@ -66,8 +66,8 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput { var model = bevy_pbr::skinning::skin_model(vertex.joint_indices, vertex.joint_weights); #else // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. - // See https://github.com/gfx-rs/naga/issues/2416 - var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model; + // See https://github.com/gfx-rs/naga/issues/2416 . + var model = mesh_functions::get_model_matrix(vertex_no_morph.instance_index); #endif #ifdef VERTEX_NORMALS @@ -78,7 +78,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput { vertex.normal, // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index) + get_instance_index(vertex_no_morph.instance_index) ); #endif #endif @@ -98,7 +98,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput { vertex.tangent, // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index) + get_instance_index(vertex_no_morph.instance_index) ); #endif @@ -109,7 +109,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput { #ifdef VERTEX_OUTPUT_INSTANCE_INDEX // Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug. // See https://github.com/gfx-rs/naga/issues/2416 - out.instance_index = bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index); + out.instance_index = get_instance_index(vertex_no_morph.instance_index); #endif return out; diff --git a/crates/bevy_pbr/src/render/mesh_functions.wgsl b/crates/bevy_pbr/src/render/mesh_functions.wgsl index 4b5d1da7bd..ff3a616335 100644 --- a/crates/bevy_pbr/src/render/mesh_functions.wgsl +++ b/crates/bevy_pbr/src/render/mesh_functions.wgsl @@ -3,6 +3,35 @@ #import bevy_pbr::mesh_view_bindings view #import bevy_pbr::mesh_bindings mesh #import bevy_pbr::mesh_types MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT +#import bevy_render::instance_index get_instance_index + +fn affine_to_square(affine: mat3x4) -> mat4x4 { + return transpose(mat4x4( + affine[0], + affine[1], + affine[2], + vec4(0.0, 0.0, 0.0, 1.0), + )); +} + +fn mat2x4_f32_to_mat3x3_unpack( + a: mat2x4, + b: f32, +) -> mat3x3 { + return mat3x3( + a[0].xyz, + vec3(a[0].w, a[1].xy), + vec3(a[1].zw, b), + ); +} + +fn get_model_matrix(instance_index: u32) -> mat4x4 { + return affine_to_square(mesh[get_instance_index(instance_index)].model); +} + +fn get_previous_model_matrix(instance_index: u32) -> mat4x4 { + return affine_to_square(mesh[get_instance_index(instance_index)].previous_model); +} fn mesh_position_local_to_world(model: mat4x4, vertex_position: vec4) -> vec4 { return model * vertex_position; @@ -28,10 +57,9 @@ fn mesh_normal_local_to_world(vertex_normal: vec3, instance_index: u32) -> // unless you really know what you are doing. // http://www.mikktspace.com/ return normalize( - mat3x3( - mesh[instance_index].inverse_transpose_model[0].xyz, - mesh[instance_index].inverse_transpose_model[1].xyz, - mesh[instance_index].inverse_transpose_model[2].xyz + mat2x4_f32_to_mat3x3_unpack( + mesh[instance_index].inverse_transpose_model_a, + mesh[instance_index].inverse_transpose_model_b, ) * vertex_normal ); } diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index f8a775cc8f..9b50336473 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -1,9 +1,17 @@ #define_import_path bevy_pbr::mesh_types struct Mesh { - model: mat4x4, - previous_model: mat4x4, - inverse_transpose_model: mat4x4, + // Affine 4x3 matrices transposed to 3x4 + // Use bevy_pbr::mesh_functions::affine_to_square to unpack + model: mat3x4, + previous_model: mat3x4, + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + // Use bevy_pbr::mesh_functions::mat2x4_f32_to_mat3x3_unpack to unpack + inverse_transpose_model_a: mat2x4, + inverse_transpose_model_b: f32, // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. flags: u32, }; diff --git a/crates/bevy_pbr/src/render/wireframe.wgsl b/crates/bevy_pbr/src/render/wireframe.wgsl index 2b4f112608..d44842b0b2 100644 --- a/crates/bevy_pbr/src/render/wireframe.wgsl +++ b/crates/bevy_pbr/src/render/wireframe.wgsl @@ -1,6 +1,5 @@ #import bevy_pbr::mesh_bindings mesh -#import bevy_pbr::mesh_functions mesh_position_local_to_clip -#import bevy_render::instance_index +#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip #ifdef SKINNED #import bevy_pbr::skinning @@ -24,7 +23,7 @@ fn vertex(vertex: Vertex) -> VertexOutput { #ifdef SKINNED let model = bevy_pbr::skinning::skin_model(vertex.joint_indexes, vertex.joint_weights); #else - let model = mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model; + let model = get_model_matrix(vertex_no_morph.instance_index); #endif var out: VertexOutput; diff --git a/crates/bevy_pbr/src/wireframe.rs b/crates/bevy_pbr/src/wireframe.rs index 1a6757abfe..cefb2c83d3 100644 --- a/crates/bevy_pbr/src/wireframe.rs +++ b/crates/bevy_pbr/src/wireframe.rs @@ -1,5 +1,5 @@ -use crate::MeshPipeline; use crate::{DrawMesh, MeshPipelineKey, MeshUniform, SetMeshBindGroup, SetMeshViewBindGroup}; +use crate::{MeshPipeline, MeshTransforms}; use bevy_app::Plugin; use bevy_asset::{load_internal_asset, Handle, HandleUntyped}; use bevy_core_pipeline::core_3d::Opaque3d; @@ -121,14 +121,14 @@ fn queue_wireframes( Query<( Entity, &Handle, - &MeshUniform, + &MeshTransforms, &GpuArrayBufferIndex, )>, Query< ( Entity, &Handle, - &MeshUniform, + &MeshTransforms, &GpuArrayBufferIndex, ), With, @@ -142,10 +142,10 @@ fn queue_wireframes( let rangefinder = view.rangefinder3d(); let view_key = msaa_key | MeshPipelineKey::from_hdr(view.hdr); - let add_render_phase = |(entity, mesh_handle, mesh_uniform, batch_indices): ( + let add_render_phase = |(entity, mesh_handle, mesh_transforms, batch_indices): ( Entity, &Handle, - &MeshUniform, + &MeshTransforms, &GpuArrayBufferIndex, )| { if let Some(mesh) = render_meshes.get(mesh_handle) { @@ -164,7 +164,8 @@ fn queue_wireframes( entity, pipeline: pipeline_id, draw_function: draw_custom, - distance: rangefinder.distance(&mesh_uniform.transform), + distance: rangefinder + .distance_translation(&mesh_transforms.transform.translation), per_object_binding_dynamic_offset: batch_indices .dynamic_offset .unwrap_or_default(), diff --git a/crates/bevy_render/src/primitives/mod.rs b/crates/bevy_render/src/primitives/mod.rs index fc447a90cf..2cb067ded1 100644 --- a/crates/bevy_render/src/primitives/mod.rs +++ b/crates/bevy_render/src/primitives/mod.rs @@ -1,5 +1,5 @@ use bevy_ecs::{component::Component, prelude::Entity, reflect::ReflectComponent}; -use bevy_math::{Mat4, Vec3, Vec3A, Vec4, Vec4Swizzles}; +use bevy_math::{Affine3A, Mat3A, Mat4, Vec3, Vec3A, Vec4, Vec4Swizzles}; use bevy_reflect::Reflect; use bevy_utils::HashMap; @@ -26,13 +26,13 @@ impl Aabb { /// Calculate the relative radius of the AABB with respect to a plane #[inline] - pub fn relative_radius(&self, p_normal: &Vec3A, axes: &[Vec3A]) -> f32 { + pub fn relative_radius(&self, p_normal: &Vec3A, model: &Mat3A) -> f32 { // NOTE: dot products on Vec3A use SIMD and even with the overhead of conversion are net faster than Vec3 let half_extents = self.half_extents; Vec3A::new( - p_normal.dot(axes[0]), - p_normal.dot(axes[1]), - p_normal.dot(axes[2]), + p_normal.dot(model.x_axis), + p_normal.dot(model.y_axis), + p_normal.dot(model.z_axis), ) .abs() .dot(half_extents) @@ -67,16 +67,11 @@ pub struct Sphere { impl Sphere { #[inline] - pub fn intersects_obb(&self, aabb: &Aabb, local_to_world: &Mat4) -> bool { - let aabb_center_world = *local_to_world * aabb.center.extend(1.0); - let axes = [ - Vec3A::from(local_to_world.x_axis), - Vec3A::from(local_to_world.y_axis), - Vec3A::from(local_to_world.z_axis), - ]; - let v = Vec3A::from(aabb_center_world) - self.center; + pub fn intersects_obb(&self, aabb: &Aabb, local_to_world: &Affine3A) -> bool { + let aabb_center_world = local_to_world.transform_point3a(aabb.center); + let v = aabb_center_world - self.center; let d = v.length(); - let relative_radius = aabb.relative_radius(&(v / d), &axes); + let relative_radius = aabb.relative_radius(&(v / d), &local_to_world.matrix3); d < self.radius + relative_radius } } @@ -195,17 +190,11 @@ impl Frustum { pub fn intersects_obb( &self, aabb: &Aabb, - model_to_world: &Mat4, + model_to_world: &Affine3A, intersect_near: bool, intersect_far: bool, ) -> bool { let aabb_center_world = model_to_world.transform_point3a(aabb.center).extend(1.0); - let axes = [ - Vec3A::from(model_to_world.x_axis), - Vec3A::from(model_to_world.y_axis), - Vec3A::from(model_to_world.z_axis), - ]; - for (idx, half_space) in self.half_spaces.into_iter().enumerate() { if idx == 4 && !intersect_near { continue; @@ -214,7 +203,7 @@ impl Frustum { continue; } let p_normal = half_space.normal(); - let relative_radius = aabb.relative_radius(&p_normal, &axes); + let relative_radius = aabb.relative_radius(&p_normal, &model_to_world.matrix3); if half_space.normal_d().dot(aabb_center_world) + relative_radius <= 0.0 { return false; } diff --git a/crates/bevy_render/src/render_phase/rangefinder.rs b/crates/bevy_render/src/render_phase/rangefinder.rs index 797782b9cc..40e75183c8 100644 --- a/crates/bevy_render/src/render_phase/rangefinder.rs +++ b/crates/bevy_render/src/render_phase/rangefinder.rs @@ -1,4 +1,4 @@ -use bevy_math::{Mat4, Vec4}; +use bevy_math::{Mat4, Vec3, Vec4}; /// A distance calculator for the draw order of [`PhaseItem`](crate::render_phase::PhaseItem)s. pub struct ViewRangefinder3d { @@ -15,6 +15,14 @@ impl ViewRangefinder3d { } } + /// Calculates the distance, or view-space `Z` value, for the given `translation`. + #[inline] + pub fn distance_translation(&self, translation: &Vec3) -> f32 { + // NOTE: row 2 of the inverse view matrix dotted with the translation from the model matrix + // gives the z component of translation of the mesh in view-space + self.inverse_view_row_2.dot(translation.extend(1.0)) + } + /// Calculates the distance, or view-space `Z` value, for the given `transform`. #[inline] pub fn distance(&self, transform: &Mat4) -> f32 { diff --git a/crates/bevy_render/src/view/visibility/mod.rs b/crates/bevy_render/src/view/visibility/mod.rs index 0e99f1cc09..9a58b4ada7 100644 --- a/crates/bevy_render/src/view/visibility/mod.rs +++ b/crates/bevy_render/src/view/visibility/mod.rs @@ -392,7 +392,7 @@ pub fn check_visibility( // If we have an aabb and transform, do frustum culling if maybe_no_frustum_culling.is_none() { - let model = transform.compute_matrix(); + let model = transform.affine(); let model_sphere = Sphere { center: model.transform_point3a(model_aabb.center), radius: transform.radius_vec3a(model_aabb.half_extents), diff --git a/examples/shader/shader_instancing.rs b/examples/shader/shader_instancing.rs index 389427f9ee..925c0c14d3 100644 --- a/examples/shader/shader_instancing.rs +++ b/examples/shader/shader_instancing.rs @@ -6,7 +6,7 @@ use bevy::{ query::QueryItem, system::{lifetimeless::*, SystemParamItem}, }, - pbr::{MeshPipeline, MeshPipelineKey, MeshUniform, SetMeshBindGroup, SetMeshViewBindGroup}, + pbr::{MeshPipeline, MeshPipelineKey, MeshTransforms, SetMeshBindGroup, SetMeshViewBindGroup}, prelude::*, render::{ extract_component::{ExtractComponent, ExtractComponentPlugin}, @@ -113,7 +113,7 @@ fn queue_custom( mut pipelines: ResMut>, pipeline_cache: Res, meshes: Res>, - material_meshes: Query<(Entity, &MeshUniform, &Handle), With>, + material_meshes: Query<(Entity, &MeshTransforms, &Handle), With>, mut views: Query<(&ExtractedView, &mut RenderPhase)>, ) { let draw_custom = transparent_3d_draw_functions.read().id::(); @@ -123,7 +123,7 @@ fn queue_custom( for (view, mut transparent_phase) in &mut views { let view_key = msaa_key | MeshPipelineKey::from_hdr(view.hdr); let rangefinder = view.rangefinder3d(); - for (entity, mesh_uniform, mesh_handle) in &material_meshes { + for (entity, mesh_transforms, mesh_handle) in &material_meshes { if let Some(mesh) = meshes.get(mesh_handle) { let key = view_key | MeshPipelineKey::from_primitive_topology(mesh.primitive_topology); @@ -134,7 +134,8 @@ fn queue_custom( entity, pipeline, draw_function: draw_custom, - distance: rangefinder.distance(&mesh_uniform.transform), + distance: rangefinder + .distance_translation(&mesh_transforms.transform.translation), }); } }