Reduce the size of MeshUniform to improve performance (#9416)

# Objective

- Significantly reduce the size of MeshUniform by only including
necessary data.

## Solution

Local to world, model transforms are affine. This means they only need a
4x3 matrix to represent them.

`MeshUniform` stores the current, and previous model transforms, and the
inverse transpose of the current model transform, all as 4x4 matrices.
Instead we can store the current, and previous model transforms as 4x3
matrices, and we only need the upper-left 3x3 part of the inverse
transpose of the current model transform. This change allows us to
reduce the serialized MeshUniform size from 208 bytes to 144 bytes,
which is over a 30% saving in data to serialize, and VRAM bandwidth and
space.

## Benchmarks

On an M1 Max, running `many_cubes -- sphere`, main is in yellow, this PR
is in red:
<img width="1484" alt="Screenshot 2023-08-11 at 02 36 43"
src="https://github.com/bevyengine/bevy/assets/302146/7d99c7b3-f2bb-4004-a8d0-4c00f755cb0d">
A reduction in frame time of ~14%.

---

## Changelog

- Changed: Redefined `MeshUniform` to improve performance by using 4x3
affine transforms and reconstructing 4x4 matrices in the shader. Helper
functions were added to `bevy_pbr::mesh_functions` to unpack the data.
`affine_to_square` converts the packed 4x3 in 3x4 matrix data to a 4x4
matrix. `mat2x4_f32_to_mat3x3` converts the 3x3 in mat2x4 + f32 matrix
data back into a 3x3.

## Migration Guide

Shader code before:
```
var model = mesh[instance_index].model;
```

Shader code after:
```
#import bevy_pbr::mesh_functions affine_to_square

var model = affine_to_square(mesh[instance_index].model);
```
This commit is contained in:
Robert Swain 2023-08-15 08:00:23 +02:00 committed by GitHub
parent b30ff2ab76
commit 0a11af9375
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 249 additions and 98 deletions

View file

@ -1,6 +1,5 @@
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_render::instance_index
#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip
struct CustomMaterial {
color: vec4<f32>,
@ -23,7 +22,7 @@ struct VertexOutput {
fn vertex(vertex: Vertex) -> VertexOutput {
var out: VertexOutput;
out.clip_position = mesh_position_local_to_clip(
mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model,
get_model_matrix(vertex.instance_index),
vec4<f32>(vertex.position, 1.0),
);
out.blend_color = vertex.blend_color;

View file

@ -1,4 +1,4 @@
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip
#import bevy_pbr::mesh_bindings mesh
struct Vertex {
@ -19,12 +19,12 @@ struct VertexOutput {
fn vertex(vertex: Vertex) -> VertexOutput {
let position = vertex.position * vertex.i_pos_scale.w + vertex.i_pos_scale.xyz;
var out: VertexOutput;
// NOTE: The 0 index into the Mesh array is a hack for this example as the
// instance_index builtin would map to the wrong index in the Mesh array.
// This index could be passed in via another uniform instead but it's
// unnecessary for the example.
// NOTE: Passing 0 as the instance_index to get_model_matrix() is a hack
// for this example as the instance_index builtin would map to the wrong
// index in the Mesh array. This index could be passed in via another
// uniform instead but it's unnecessary for the example.
out.clip_position = mesh_position_local_to_clip(
mesh[0].model,
get_model_matrix(0),
vec4<f32>(position, 1.0)
);
out.color = vertex.i_color;

View file

@ -0,0 +1,29 @@
use glam::{Affine3A, Mat3, Vec3};
/// Reduced-size version of `glam::Affine3A` for use when storage has
/// significant performance impact. Convert to `glam::Affine3A` to do
/// non-trivial calculations.
pub struct Affine3 {
/// Scaling, rotation, shears, and other non-translation affine transforms
pub matrix3: Mat3,
/// Translation
pub translation: Vec3,
}
impl From<&Affine3A> for Affine3 {
fn from(affine: &Affine3A) -> Self {
Self {
matrix3: affine.matrix3.into(),
translation: affine.translation.into(),
}
}
}
impl From<&Affine3> for Affine3A {
fn from(affine3: &Affine3) -> Self {
Self {
matrix3: affine3.matrix3.into(),
translation: affine3.translation.into(),
}
}
}

View file

@ -7,10 +7,12 @@
#![allow(clippy::type_complexity)]
#![warn(missing_docs)]
mod affine3;
pub mod cubic_splines;
mod ray;
mod rects;
pub use affine3::*;
pub use ray::Ray;
pub use rects::*;

View file

@ -2025,7 +2025,7 @@ pub fn check_light_mesh_visibility(
view_frusta.iter().zip(view_visible_entities)
{
// Disable near-plane culling, as a shadow caster could lie before the near plane.
if !frustum.intersects_obb(aabb, &transform.compute_matrix(), false, true) {
if !frustum.intersects_obb(aabb, &transform.affine(), false, true) {
continue;
}
@ -2098,7 +2098,7 @@ pub fn check_light_mesh_visibility(
// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;
@ -2162,7 +2162,7 @@ pub fn check_light_mesh_visibility(
// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;

View file

@ -1,6 +1,6 @@
use crate::{
render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey,
MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
MeshTransforms, MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
};
use bevy_app::{App, Plugin};
@ -382,7 +382,7 @@ pub fn queue_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
images: Res<RenderAssets<Image>>,
@ -468,7 +468,7 @@ pub fn queue_material_meshes<M: Material>(
let rangefinder = view.rangefinder3d();
for visible_entity in &visible_entities.entities {
if let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) =
if let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) =
material_meshes.get(*visible_entity)
{
if let (Some(mesh), Some(material)) = (
@ -516,7 +516,8 @@ pub fn queue_material_meshes<M: Material>(
}
};
let distance = rangefinder.distance(&mesh_uniform.transform)
let distance = rangefinder
.distance_translation(&mesh_transforms.transform.translation)
+ material.properties.depth_bias;
match material.properties.alpha_mode {
AlphaMode::Opaque => {

View file

@ -15,7 +15,7 @@ use bevy_ecs::{
SystemParamItem,
},
};
use bevy_math::Mat4;
use bevy_math::{Affine3A, Mat4};
use bevy_reflect::TypeUuid;
use bevy_render::{
globals::{GlobalsBuffer, GlobalsUniform},
@ -46,8 +46,8 @@ use bevy_utils::tracing::error;
use crate::{
prepare_lights, setup_morph_and_skinning_defs, AlphaMode, DrawMesh, Material, MaterialPipeline,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshUniform, RenderMaterials,
SetMaterialBindGroup, SetMeshBindGroup,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshTransforms, MeshUniform,
RenderMaterials, SetMaterialBindGroup, SetMeshBindGroup,
};
use std::{hash::Hash, marker::PhantomData};
@ -203,7 +203,7 @@ pub fn update_previous_view_projections(
}
#[derive(Component)]
pub struct PreviousGlobalTransform(pub Mat4);
pub struct PreviousGlobalTransform(pub Affine3A);
pub fn update_mesh_previous_global_transforms(
mut commands: Commands,
@ -216,7 +216,7 @@ pub fn update_mesh_previous_global_transforms(
for (entity, transform) in &meshes {
commands
.entity(entity)
.insert(PreviousGlobalTransform(transform.compute_matrix()));
.insert(PreviousGlobalTransform(transform.affine()));
}
}
}
@ -762,7 +762,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
mut views: Query<(
@ -809,7 +809,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
let rangefinder = view.rangefinder3d();
for visible_entity in &visible_entities.entities {
let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) = material_meshes.get(*visible_entity) else {
let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) = material_meshes.get(*visible_entity) else {
continue;
};
@ -852,8 +852,8 @@ pub fn queue_prepass_material_meshes<M: Material>(
}
};
let distance =
rangefinder.distance(&mesh_uniform.transform) + material.properties.depth_bias;
let distance = rangefinder.distance_translation(&mesh_transforms.transform.translation)
+ material.properties.depth_bias;
match alpha_mode {
AlphaMode::Opaque => {
opaque_phase.add(Opaque3dPrepass {

View file

@ -3,7 +3,7 @@
#import bevy_pbr::skinning
#import bevy_pbr::morph
#import bevy_pbr::mesh_bindings mesh
#import bevy_render::instance_index
#import bevy_render::instance_index get_instance_index
// Most of these attributes are not used in the default prepass fragment shader, but they are still needed so we can
// pass them to custom prepass shaders like pbr_prepass.wgsl.
@ -92,7 +92,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
#else // SKINNED
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model;
var model = bevy_pbr::mesh_functions::get_model_matrix(vertex_no_morph.instance_index);
#endif // SKINNED
out.clip_position = bevy_pbr::mesh_functions::mesh_position_local_to_clip(model, vec4(vertex.position, 1.0));
@ -113,7 +113,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.normal,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // SKINNED
@ -123,7 +123,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.tangent,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // VERTEX_TANGENTS
#endif // NORMAL_PREPASS
@ -133,7 +133,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
out.previous_world_position = bevy_pbr::mesh_functions::mesh_position_local_to_world(
mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].previous_model,
bevy_pbr::mesh_functions::get_previous_model_matrix(vertex_no_morph.instance_index),
vec4<f32>(vertex.position, 1.0)
);
#endif // MOTION_VECTOR_PREPASS

View file

@ -18,11 +18,10 @@ use bevy_ecs::{
query::ROQueryItem,
system::{lifetimeless::*, SystemParamItem, SystemState},
};
use bevy_math::{Mat3A, Mat4, Vec2};
use bevy_math::{Affine3, Affine3A, Mat4, Vec2, Vec3Swizzles, Vec4};
use bevy_reflect::TypeUuid;
use bevy_render::{
globals::{GlobalsBuffer, GlobalsUniform},
gpu_component_array_buffer::GpuComponentArrayBufferPlugin,
mesh::{
skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
GpuBufferInfo, InnerMeshVertexBufferLayout, Mesh, MeshVertexBufferLayout,
@ -115,8 +114,6 @@ impl Plugin for MeshRenderPlugin {
load_internal_asset!(app, SKINNING_HANDLE, "skinning.wgsl", Shader::from_wgsl);
load_internal_asset!(app, MORPH_HANDLE, "morph.wgsl", Shader::from_wgsl);
app.add_plugins(GpuComponentArrayBufferPlugin::<MeshUniform>::default());
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<SkinnedMeshUniform>()
@ -129,6 +126,7 @@ impl Plugin for MeshRenderPlugin {
.add_systems(
Render,
(
prepare_mesh_uniforms.in_set(RenderSet::Prepare),
prepare_skinned_meshes.in_set(RenderSet::Prepare),
prepare_morphs.in_set(RenderSet::Prepare),
queue_mesh_bind_group.in_set(RenderSet::Queue),
@ -151,7 +149,11 @@ impl Plugin for MeshRenderPlugin {
));
}
render_app.init_resource::<MeshPipeline>();
render_app
.insert_resource(GpuArrayBuffer::<MeshUniform>::new(
render_app.world.resource::<RenderDevice>(),
))
.init_resource::<MeshPipeline>();
}
// Load the mesh_bindings shader module here as it depends on runtime information about
@ -166,14 +168,76 @@ impl Plugin for MeshRenderPlugin {
}
}
#[derive(Component, ShaderType, Clone)]
pub struct MeshUniform {
pub transform: Mat4,
pub previous_transform: Mat4,
pub inverse_transpose_model: Mat4,
#[derive(Component)]
pub struct MeshTransforms {
pub transform: Affine3,
pub previous_transform: Affine3,
pub flags: u32,
}
#[derive(ShaderType, Clone)]
pub struct MeshUniform {
// Affine 4x3 matrices transposed to 3x4
pub transform: [Vec4; 3],
pub previous_transform: [Vec4; 3],
// 3x3 matrix packed in mat2x4 and f32 as:
// [0].xyz, [1].x,
// [1].yz, [2].xy
// [2].z
pub inverse_transpose_model_a: [Vec4; 2],
pub inverse_transpose_model_b: f32,
pub flags: u32,
}
impl From<&MeshTransforms> for MeshUniform {
fn from(mesh_transforms: &MeshTransforms) -> Self {
let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose();
let transpose_previous_model_3x3 = mesh_transforms.previous_transform.matrix3.transpose();
let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform)
.inverse()
.matrix3
.transpose();
Self {
transform: [
transpose_model_3x3
.x_axis
.extend(mesh_transforms.transform.translation.x),
transpose_model_3x3
.y_axis
.extend(mesh_transforms.transform.translation.y),
transpose_model_3x3
.z_axis
.extend(mesh_transforms.transform.translation.z),
],
previous_transform: [
transpose_previous_model_3x3
.x_axis
.extend(mesh_transforms.previous_transform.translation.x),
transpose_previous_model_3x3
.y_axis
.extend(mesh_transforms.previous_transform.translation.y),
transpose_previous_model_3x3
.z_axis
.extend(mesh_transforms.previous_transform.translation.z),
],
inverse_transpose_model_a: [
(
inverse_transpose_model_3x3.x_axis,
inverse_transpose_model_3x3.y_axis.x,
)
.into(),
(
inverse_transpose_model_3x3.y_axis.yz(),
inverse_transpose_model_3x3.z_axis.xy(),
)
.into(),
],
inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z,
flags: mesh_transforms.flags,
}
}
}
// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_types.wgsl!
bitflags::bitflags! {
#[repr(transparent)]
@ -210,26 +274,25 @@ pub fn extract_meshes(
for (entity, _, transform, previous_transform, handle, not_receiver, not_caster) in
visible_meshes
{
let transform = transform.compute_matrix();
let transform = transform.affine();
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
let mut flags = if not_receiver.is_some() {
MeshFlags::empty()
} else {
MeshFlags::SHADOW_RECEIVER
};
if Mat3A::from_mat4(transform).determinant().is_sign_positive() {
if transform.matrix3.determinant().is_sign_positive() {
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
}
let uniform = MeshUniform {
let transforms = MeshTransforms {
transform: (&transform).into(),
previous_transform: (&previous_transform).into(),
flags: flags.bits(),
transform,
previous_transform,
inverse_transpose_model: transform.inverse().transpose(),
};
if not_caster.is_some() {
not_caster_commands.push((entity, (handle.clone_weak(), uniform, NotShadowCaster)));
not_caster_commands.push((entity, (handle.clone_weak(), transforms, NotShadowCaster)));
} else {
caster_commands.push((entity, (handle.clone_weak(), uniform)));
caster_commands.push((entity, (handle.clone_weak(), transforms)));
}
}
*prev_caster_commands_len = caster_commands.len();
@ -317,6 +380,29 @@ pub fn extract_skinned_meshes(
commands.insert_or_spawn_batch(values);
}
fn prepare_mesh_uniforms(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut gpu_array_buffer: ResMut<GpuArrayBuffer<MeshUniform>>,
components: Query<(Entity, &MeshTransforms)>,
) {
gpu_array_buffer.clear();
let entities = components
.iter()
.map(|(entity, mesh_transforms)| {
(
entity,
gpu_array_buffer.push(MeshUniform::from(mesh_transforms)),
)
})
.collect::<Vec<_>>();
commands.insert_or_spawn_batch(entities);
gpu_array_buffer.write_buffer(&render_device, &render_queue);
}
#[derive(Resource, Clone)]
pub struct MeshPipeline {
pub view_layout: BindGroupLayout,

View file

@ -3,7 +3,7 @@
#import bevy_pbr::morph
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_vertex_output MeshVertexOutput
#import bevy_render::instance_index
#import bevy_render::instance_index get_instance_index
struct Vertex {
@builtin(instance_index) instance_index: u32,
@ -66,8 +66,8 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput {
var model = bevy_pbr::skinning::skin_model(vertex.joint_indices, vertex.joint_weights);
#else
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model;
// See https://github.com/gfx-rs/naga/issues/2416 .
var model = mesh_functions::get_model_matrix(vertex_no_morph.instance_index);
#endif
#ifdef VERTEX_NORMALS
@ -78,7 +78,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput {
vertex.normal,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif
#endif
@ -98,7 +98,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput {
vertex.tangent,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif
@ -109,7 +109,7 @@ fn vertex(vertex_no_morph: Vertex) -> MeshVertexOutput {
#ifdef VERTEX_OUTPUT_INSTANCE_INDEX
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
out.instance_index = bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index);
out.instance_index = get_instance_index(vertex_no_morph.instance_index);
#endif
return out;

View file

@ -3,6 +3,35 @@
#import bevy_pbr::mesh_view_bindings view
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_types MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT
#import bevy_render::instance_index get_instance_index
fn affine_to_square(affine: mat3x4<f32>) -> mat4x4<f32> {
return transpose(mat4x4<f32>(
affine[0],
affine[1],
affine[2],
vec4<f32>(0.0, 0.0, 0.0, 1.0),
));
}
fn mat2x4_f32_to_mat3x3_unpack(
a: mat2x4<f32>,
b: f32,
) -> mat3x3<f32> {
return mat3x3<f32>(
a[0].xyz,
vec3<f32>(a[0].w, a[1].xy),
vec3<f32>(a[1].zw, b),
);
}
fn get_model_matrix(instance_index: u32) -> mat4x4<f32> {
return affine_to_square(mesh[get_instance_index(instance_index)].model);
}
fn get_previous_model_matrix(instance_index: u32) -> mat4x4<f32> {
return affine_to_square(mesh[get_instance_index(instance_index)].previous_model);
}
fn mesh_position_local_to_world(model: mat4x4<f32>, vertex_position: vec4<f32>) -> vec4<f32> {
return model * vertex_position;
@ -28,10 +57,9 @@ fn mesh_normal_local_to_world(vertex_normal: vec3<f32>, instance_index: u32) ->
// unless you really know what you are doing.
// http://www.mikktspace.com/
return normalize(
mat3x3<f32>(
mesh[instance_index].inverse_transpose_model[0].xyz,
mesh[instance_index].inverse_transpose_model[1].xyz,
mesh[instance_index].inverse_transpose_model[2].xyz
mat2x4_f32_to_mat3x3_unpack(
mesh[instance_index].inverse_transpose_model_a,
mesh[instance_index].inverse_transpose_model_b,
) * vertex_normal
);
}

View file

@ -1,9 +1,17 @@
#define_import_path bevy_pbr::mesh_types
struct Mesh {
model: mat4x4<f32>,
previous_model: mat4x4<f32>,
inverse_transpose_model: mat4x4<f32>,
// Affine 4x3 matrices transposed to 3x4
// Use bevy_pbr::mesh_functions::affine_to_square to unpack
model: mat3x4<f32>,
previous_model: mat3x4<f32>,
// 3x3 matrix packed in mat2x4 and f32 as:
// [0].xyz, [1].x,
// [1].yz, [2].xy
// [2].z
// Use bevy_pbr::mesh_functions::mat2x4_f32_to_mat3x3_unpack to unpack
inverse_transpose_model_a: mat2x4<f32>,
inverse_transpose_model_b: f32,
// 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options.
flags: u32,
};

View file

@ -1,6 +1,5 @@
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_render::instance_index
#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip
#ifdef SKINNED
#import bevy_pbr::skinning
@ -24,7 +23,7 @@ fn vertex(vertex: Vertex) -> VertexOutput {
#ifdef SKINNED
let model = bevy_pbr::skinning::skin_model(vertex.joint_indexes, vertex.joint_weights);
#else
let model = mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model;
let model = get_model_matrix(vertex_no_morph.instance_index);
#endif
var out: VertexOutput;

View file

@ -1,5 +1,5 @@
use crate::MeshPipeline;
use crate::{DrawMesh, MeshPipelineKey, MeshUniform, SetMeshBindGroup, SetMeshViewBindGroup};
use crate::{MeshPipeline, MeshTransforms};
use bevy_app::Plugin;
use bevy_asset::{load_internal_asset, Handle, HandleUntyped};
use bevy_core_pipeline::core_3d::Opaque3d;
@ -121,14 +121,14 @@ fn queue_wireframes(
Query<(
Entity,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
Query<
(
Entity,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
),
With<Wireframe>,
@ -142,10 +142,10 @@ fn queue_wireframes(
let rangefinder = view.rangefinder3d();
let view_key = msaa_key | MeshPipelineKey::from_hdr(view.hdr);
let add_render_phase = |(entity, mesh_handle, mesh_uniform, batch_indices): (
let add_render_phase = |(entity, mesh_handle, mesh_transforms, batch_indices): (
Entity,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)| {
if let Some(mesh) = render_meshes.get(mesh_handle) {
@ -164,7 +164,8 @@ fn queue_wireframes(
entity,
pipeline: pipeline_id,
draw_function: draw_custom,
distance: rangefinder.distance(&mesh_uniform.transform),
distance: rangefinder
.distance_translation(&mesh_transforms.transform.translation),
per_object_binding_dynamic_offset: batch_indices
.dynamic_offset
.unwrap_or_default(),

View file

@ -1,5 +1,5 @@
use bevy_ecs::{component::Component, prelude::Entity, reflect::ReflectComponent};
use bevy_math::{Mat4, Vec3, Vec3A, Vec4, Vec4Swizzles};
use bevy_math::{Affine3A, Mat3A, Mat4, Vec3, Vec3A, Vec4, Vec4Swizzles};
use bevy_reflect::Reflect;
use bevy_utils::HashMap;
@ -26,13 +26,13 @@ impl Aabb {
/// Calculate the relative radius of the AABB with respect to a plane
#[inline]
pub fn relative_radius(&self, p_normal: &Vec3A, axes: &[Vec3A]) -> f32 {
pub fn relative_radius(&self, p_normal: &Vec3A, model: &Mat3A) -> f32 {
// NOTE: dot products on Vec3A use SIMD and even with the overhead of conversion are net faster than Vec3
let half_extents = self.half_extents;
Vec3A::new(
p_normal.dot(axes[0]),
p_normal.dot(axes[1]),
p_normal.dot(axes[2]),
p_normal.dot(model.x_axis),
p_normal.dot(model.y_axis),
p_normal.dot(model.z_axis),
)
.abs()
.dot(half_extents)
@ -67,16 +67,11 @@ pub struct Sphere {
impl Sphere {
#[inline]
pub fn intersects_obb(&self, aabb: &Aabb, local_to_world: &Mat4) -> bool {
let aabb_center_world = *local_to_world * aabb.center.extend(1.0);
let axes = [
Vec3A::from(local_to_world.x_axis),
Vec3A::from(local_to_world.y_axis),
Vec3A::from(local_to_world.z_axis),
];
let v = Vec3A::from(aabb_center_world) - self.center;
pub fn intersects_obb(&self, aabb: &Aabb, local_to_world: &Affine3A) -> bool {
let aabb_center_world = local_to_world.transform_point3a(aabb.center);
let v = aabb_center_world - self.center;
let d = v.length();
let relative_radius = aabb.relative_radius(&(v / d), &axes);
let relative_radius = aabb.relative_radius(&(v / d), &local_to_world.matrix3);
d < self.radius + relative_radius
}
}
@ -195,17 +190,11 @@ impl Frustum {
pub fn intersects_obb(
&self,
aabb: &Aabb,
model_to_world: &Mat4,
model_to_world: &Affine3A,
intersect_near: bool,
intersect_far: bool,
) -> bool {
let aabb_center_world = model_to_world.transform_point3a(aabb.center).extend(1.0);
let axes = [
Vec3A::from(model_to_world.x_axis),
Vec3A::from(model_to_world.y_axis),
Vec3A::from(model_to_world.z_axis),
];
for (idx, half_space) in self.half_spaces.into_iter().enumerate() {
if idx == 4 && !intersect_near {
continue;
@ -214,7 +203,7 @@ impl Frustum {
continue;
}
let p_normal = half_space.normal();
let relative_radius = aabb.relative_radius(&p_normal, &axes);
let relative_radius = aabb.relative_radius(&p_normal, &model_to_world.matrix3);
if half_space.normal_d().dot(aabb_center_world) + relative_radius <= 0.0 {
return false;
}

View file

@ -1,4 +1,4 @@
use bevy_math::{Mat4, Vec4};
use bevy_math::{Mat4, Vec3, Vec4};
/// A distance calculator for the draw order of [`PhaseItem`](crate::render_phase::PhaseItem)s.
pub struct ViewRangefinder3d {
@ -15,6 +15,14 @@ impl ViewRangefinder3d {
}
}
/// Calculates the distance, or view-space `Z` value, for the given `translation`.
#[inline]
pub fn distance_translation(&self, translation: &Vec3) -> f32 {
// NOTE: row 2 of the inverse view matrix dotted with the translation from the model matrix
// gives the z component of translation of the mesh in view-space
self.inverse_view_row_2.dot(translation.extend(1.0))
}
/// Calculates the distance, or view-space `Z` value, for the given `transform`.
#[inline]
pub fn distance(&self, transform: &Mat4) -> f32 {

View file

@ -392,7 +392,7 @@ pub fn check_visibility(
// If we have an aabb and transform, do frustum culling
if maybe_no_frustum_culling.is_none() {
let model = transform.compute_matrix();
let model = transform.affine();
let model_sphere = Sphere {
center: model.transform_point3a(model_aabb.center),
radius: transform.radius_vec3a(model_aabb.half_extents),

View file

@ -6,7 +6,7 @@ use bevy::{
query::QueryItem,
system::{lifetimeless::*, SystemParamItem},
},
pbr::{MeshPipeline, MeshPipelineKey, MeshUniform, SetMeshBindGroup, SetMeshViewBindGroup},
pbr::{MeshPipeline, MeshPipelineKey, MeshTransforms, SetMeshBindGroup, SetMeshViewBindGroup},
prelude::*,
render::{
extract_component::{ExtractComponent, ExtractComponentPlugin},
@ -113,7 +113,7 @@ fn queue_custom(
mut pipelines: ResMut<SpecializedMeshPipelines<CustomPipeline>>,
pipeline_cache: Res<PipelineCache>,
meshes: Res<RenderAssets<Mesh>>,
material_meshes: Query<(Entity, &MeshUniform, &Handle<Mesh>), With<InstanceMaterialData>>,
material_meshes: Query<(Entity, &MeshTransforms, &Handle<Mesh>), With<InstanceMaterialData>>,
mut views: Query<(&ExtractedView, &mut RenderPhase<Transparent3d>)>,
) {
let draw_custom = transparent_3d_draw_functions.read().id::<DrawCustom>();
@ -123,7 +123,7 @@ fn queue_custom(
for (view, mut transparent_phase) in &mut views {
let view_key = msaa_key | MeshPipelineKey::from_hdr(view.hdr);
let rangefinder = view.rangefinder3d();
for (entity, mesh_uniform, mesh_handle) in &material_meshes {
for (entity, mesh_transforms, mesh_handle) in &material_meshes {
if let Some(mesh) = meshes.get(mesh_handle) {
let key =
view_key | MeshPipelineKey::from_primitive_topology(mesh.primitive_topology);
@ -134,7 +134,8 @@ fn queue_custom(
entity,
pipeline,
draw_function: draw_custom,
distance: rangefinder.distance(&mesh_uniform.transform),
distance: rangefinder
.distance_translation(&mesh_transforms.transform.translation),
});
}
}