Fix the example regressions from packed growable buffers. (#14375)

The "uberbuffers" PR #14257 caused some examples to fail intermittently
for different reasons:

1. `morph_targets` could fail because vertex displacements for morph
targets are keyed off the vertex index. With buffer packing, the vertex
index can vary based on the position in the buffer, which caused the
morph targets to be potentially incorrect. The solution is to include
the first vertex index with the `MeshUniform` (and `MeshInputUniform` if
GPU preprocessing is in use), so that the shader can calculate the true
vertex index before performing the morph operation. This results in
wasted space in `MeshUniform`, which is unfortunate, but we'll soon be
filling in the padding with the ID of the material when bindless
textures land, so this had to happen sooner or later anyhow.

Including the vertex index in the `MeshInputUniform` caused an ordering
problem. The `MeshInputUniform` was created during the extraction phase,
before the allocations occurred, so the extraction logic didn't know
where the mesh vertex data was going to end up. The solution is to move
the `MeshInputUniform` creation (the `collect_meshes_for_gpu_building`
system) to after the allocations phase. This should be better for
parallelism anyhow, because it allows the extraction phase to finish
quicker. It's also something we'll have to do for bindless in any event.

2. The `lines` and `fog_volumes` examples could fail because their
custom drawing nodes weren't updated to supply the vertex and index
offsets in their `draw_indexed` and `draw` calls. This commit fixes this
oversight.

Fixes #14366.
This commit is contained in:
Patrick Walton 2024-07-22 12:55:51 -06:00 committed by GitHub
parent d30391b583
commit d235d41af1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 124 additions and 38 deletions

View file

@ -138,7 +138,7 @@ pub fn extract_meshlet_meshes(
gpu_scene gpu_scene
.instance_uniforms .instance_uniforms
.get_mut() .get_mut()
.push(MeshUniform::new(&transforms, None)); .push(MeshUniform::new(&transforms, 0, None));
} }
} }

View file

@ -1,5 +1,6 @@
#import bevy_pbr::{ #import bevy_pbr::{
prepass_bindings, prepass_bindings,
mesh_bindings::mesh,
mesh_functions, mesh_functions,
prepass_io::{Vertex, VertexOutput, FragmentOutput}, prepass_io::{Vertex, VertexOutput, FragmentOutput},
skinning, skinning,
@ -15,18 +16,21 @@
#ifdef MORPH_TARGETS #ifdef MORPH_TARGETS
fn morph_vertex(vertex_in: Vertex) -> Vertex { fn morph_vertex(vertex_in: Vertex) -> Vertex {
var vertex = vertex_in; var vertex = vertex_in;
let first_vertex = mesh[vertex.instance_index].first_vertex_index;
let vertex_index = vertex.index - first_vertex;
let weight_count = morph::layer_count(); let weight_count = morph::layer_count();
for (var i: u32 = 0u; i < weight_count; i ++) { for (var i: u32 = 0u; i < weight_count; i ++) {
let weight = morph::weight_at(i); let weight = morph::weight_at(i);
if weight == 0.0 { if weight == 0.0 {
continue; continue;
} }
vertex.position += weight * morph::morph(vertex.index, morph::position_offset, i); vertex.position += weight * morph::morph(vertex_index, morph::position_offset, i);
#ifdef VERTEX_NORMALS #ifdef VERTEX_NORMALS
vertex.normal += weight * morph::morph(vertex.index, morph::normal_offset, i); vertex.normal += weight * morph::morph(vertex_index, morph::normal_offset, i);
#endif #endif
#ifdef VERTEX_TANGENTS #ifdef VERTEX_TANGENTS
vertex.tangent += vec4(weight * morph::morph(vertex.index, morph::tangent_offset, i), 0.0); vertex.tangent += vec4(weight * morph::morph(vertex_index, morph::tangent_offset, i), 0.0);
#endif #endif
} }
return vertex; return vertex;

View file

@ -186,8 +186,8 @@ impl Plugin for MeshRenderPlugin {
if use_gpu_instance_buffer_builder { if use_gpu_instance_buffer_builder {
render_app render_app
.init_resource::<gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>( .init_resource::<gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>()
) .init_resource::<RenderMeshInstanceGpuQueues>()
.add_systems( .add_systems(
ExtractSchedule, ExtractSchedule,
extract_meshes_for_gpu_building.in_set(ExtractMeshesSet), extract_meshes_for_gpu_building.in_set(ExtractMeshesSet),
@ -200,6 +200,9 @@ impl Plugin for MeshRenderPlugin {
gpu_preprocessing::delete_old_work_item_buffers::<MeshPipeline> gpu_preprocessing::delete_old_work_item_buffers::<MeshPipeline>
.in_set(RenderSet::ManageViews) .in_set(RenderSet::ManageViews)
.after(prepare_view_targets), .after(prepare_view_targets),
collect_meshes_for_gpu_building
.in_set(RenderSet::PrepareAssets)
.after(allocator::allocate_and_free_meshes),
), ),
); );
} else { } else {
@ -275,6 +278,18 @@ pub struct MeshUniform {
// //
// (MSB: most significant bit; LSB: least significant bit.) // (MSB: most significant bit; LSB: least significant bit.)
pub lightmap_uv_rect: UVec2, pub lightmap_uv_rect: UVec2,
/// The index of this mesh's first vertex in the vertex buffer.
///
/// Multiple meshes can be packed into a single vertex buffer (see
/// [`MeshAllocator`]). This value stores the offset of the first vertex in
/// this mesh in that buffer.
pub first_vertex_index: u32,
/// Padding.
pub pad_a: u32,
/// Padding.
pub pad_b: u32,
/// Padding.
pub pad_c: u32,
} }
/// Information that has to be transferred from CPU to GPU in order to produce /// Information that has to be transferred from CPU to GPU in order to produce
@ -305,6 +320,18 @@ pub struct MeshInputUniform {
/// ///
/// This is used for TAA. If not present, this will be `u32::MAX`. /// This is used for TAA. If not present, this will be `u32::MAX`.
pub previous_input_index: u32, pub previous_input_index: u32,
/// The index of this mesh's first vertex in the vertex buffer.
///
/// Multiple meshes can be packed into a single vertex buffer (see
/// [`MeshAllocator`]). This value stores the offset of the first vertex in
/// this mesh in that buffer.
pub first_vertex_index: u32,
/// Padding.
pub pad_a: u32,
/// Padding.
pub pad_b: u32,
/// Padding.
pub pad_c: u32,
} }
/// Information about each mesh instance needed to cull it on GPU. /// Information about each mesh instance needed to cull it on GPU.
@ -331,7 +358,11 @@ pub struct MeshCullingData {
pub struct MeshCullingDataBuffer(RawBufferVec<MeshCullingData>); pub struct MeshCullingDataBuffer(RawBufferVec<MeshCullingData>);
impl MeshUniform { impl MeshUniform {
pub fn new(mesh_transforms: &MeshTransforms, maybe_lightmap_uv_rect: Option<Rect>) -> Self { pub fn new(
mesh_transforms: &MeshTransforms,
first_vertex_index: u32,
maybe_lightmap_uv_rect: Option<Rect>,
) -> Self {
let (local_from_world_transpose_a, local_from_world_transpose_b) = let (local_from_world_transpose_a, local_from_world_transpose_b) =
mesh_transforms.world_from_local.inverse_transpose_3x3(); mesh_transforms.world_from_local.inverse_transpose_3x3();
Self { Self {
@ -341,6 +372,10 @@ impl MeshUniform {
local_from_world_transpose_a, local_from_world_transpose_a,
local_from_world_transpose_b, local_from_world_transpose_b,
flags: mesh_transforms.flags, flags: mesh_transforms.flags,
first_vertex_index,
pad_a: 0,
pad_b: 0,
pad_c: 0,
} }
} }
} }
@ -515,6 +550,14 @@ pub enum RenderMeshInstanceGpuQueue {
GpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder, MeshCullingData)>), GpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder, MeshCullingData)>),
} }
/// The per-thread queues containing mesh instances, populated during the
/// extract phase.
///
/// These are filled in [`extract_meshes_for_gpu_building`] and consumed in
/// [`collect_meshes_for_gpu_building`].
#[derive(Resource, Default, Deref, DerefMut)]
pub struct RenderMeshInstanceGpuQueues(Parallel<RenderMeshInstanceGpuQueue>);
impl RenderMeshInstanceShared { impl RenderMeshInstanceShared {
fn from_components( fn from_components(
previous_transform: Option<&PreviousGlobalTransform>, previous_transform: Option<&PreviousGlobalTransform>,
@ -719,7 +762,14 @@ impl RenderMeshInstanceGpuBuilder {
entity: Entity, entity: Entity,
render_mesh_instances: &mut EntityHashMap<RenderMeshInstanceGpu>, render_mesh_instances: &mut EntityHashMap<RenderMeshInstanceGpu>,
current_input_buffer: &mut RawBufferVec<MeshInputUniform>, current_input_buffer: &mut RawBufferVec<MeshInputUniform>,
mesh_allocator: &MeshAllocator,
) -> usize { ) -> usize {
let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
{
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
// Push the mesh input uniform. // Push the mesh input uniform.
let current_uniform_index = current_input_buffer.push(MeshInputUniform { let current_uniform_index = current_input_buffer.push(MeshInputUniform {
world_from_local: self.world_from_local.to_transpose(), world_from_local: self.world_from_local.to_transpose(),
@ -729,6 +779,10 @@ impl RenderMeshInstanceGpuBuilder {
Some(previous_input_index) => previous_input_index.into(), Some(previous_input_index) => previous_input_index.into(),
None => u32::MAX, None => u32::MAX,
}, },
first_vertex_index,
pad_a: 0,
pad_b: 0,
pad_c: 0,
}); });
// Record the [`RenderMeshInstance`]. // Record the [`RenderMeshInstance`].
@ -900,11 +954,7 @@ pub fn extract_meshes_for_cpu_building(
pub fn extract_meshes_for_gpu_building( pub fn extract_meshes_for_gpu_building(
mut render_mesh_instances: ResMut<RenderMeshInstances>, mut render_mesh_instances: ResMut<RenderMeshInstances>,
render_visibility_ranges: Res<RenderVisibilityRanges>, render_visibility_ranges: Res<RenderVisibilityRanges>,
mut batched_instance_buffers: ResMut< mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
>,
mut mesh_culling_data_buffer: ResMut<MeshCullingDataBuffer>,
mut render_mesh_instance_queues: Local<Parallel<RenderMeshInstanceGpuQueue>>,
meshes_query: Extract< meshes_query: Extract<
Query<( Query<(
Entity, Entity,
@ -1004,13 +1054,6 @@ pub fn extract_meshes_for_gpu_building(
queue.push(entity, gpu_mesh_instance_builder, gpu_mesh_culling_data); queue.push(entity, gpu_mesh_instance_builder, gpu_mesh_culling_data);
}, },
); );
collect_meshes_for_gpu_building(
render_mesh_instances,
&mut batched_instance_buffers,
&mut mesh_culling_data_buffer,
&mut render_mesh_instance_queues,
);
} }
/// A system that sets the [`RenderMeshInstanceFlags`] for each mesh based on /// A system that sets the [`RenderMeshInstanceFlags`] for each mesh based on
@ -1044,22 +1087,28 @@ fn set_mesh_motion_vector_flags(
/// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU /// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU
/// mesh uniforms are built. /// mesh uniforms are built.
fn collect_meshes_for_gpu_building( pub fn collect_meshes_for_gpu_building(
render_mesh_instances: &mut RenderMeshInstancesGpu, render_mesh_instances: ResMut<RenderMeshInstances>,
batched_instance_buffers: &mut gpu_preprocessing::BatchedInstanceBuffers< batched_instance_buffers: ResMut<
MeshUniform, gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
MeshInputUniform,
>, >,
mesh_culling_data_buffer: &mut MeshCullingDataBuffer, mut mesh_culling_data_buffer: ResMut<MeshCullingDataBuffer>,
render_mesh_instance_queues: &mut Parallel<RenderMeshInstanceGpuQueue>, mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
mesh_allocator: Res<MeshAllocator>,
) { ) {
let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) =
render_mesh_instances.into_inner()
else {
return;
};
// Collect render mesh instances. Build up the uniform buffer. // Collect render mesh instances. Build up the uniform buffer.
let gpu_preprocessing::BatchedInstanceBuffers { let gpu_preprocessing::BatchedInstanceBuffers {
ref mut current_input_buffer, ref mut current_input_buffer,
ref mut previous_input_buffer, ref mut previous_input_buffer,
.. ..
} = batched_instance_buffers; } = batched_instance_buffers.into_inner();
// Swap buffers. // Swap buffers.
mem::swap(current_input_buffer, previous_input_buffer); mem::swap(current_input_buffer, previous_input_buffer);
@ -1076,8 +1125,9 @@ fn collect_meshes_for_gpu_building(
for (entity, mesh_instance_builder) in queue.drain(..) { for (entity, mesh_instance_builder) in queue.drain(..) {
mesh_instance_builder.add_to( mesh_instance_builder.add_to(
entity, entity,
render_mesh_instances, &mut *render_mesh_instances,
current_input_buffer, current_input_buffer,
&mesh_allocator,
); );
} }
} }
@ -1085,10 +1135,12 @@ fn collect_meshes_for_gpu_building(
for (entity, mesh_instance_builder, mesh_culling_builder) in queue.drain(..) { for (entity, mesh_instance_builder, mesh_culling_builder) in queue.drain(..) {
let instance_data_index = mesh_instance_builder.add_to( let instance_data_index = mesh_instance_builder.add_to(
entity, entity,
render_mesh_instances, &mut *render_mesh_instances,
current_input_buffer, current_input_buffer,
&mesh_allocator,
); );
let culling_data_index = mesh_culling_builder.add_to(mesh_culling_data_buffer); let culling_data_index =
mesh_culling_builder.add_to(&mut mesh_culling_data_buffer);
debug_assert_eq!(instance_data_index, culling_data_index); debug_assert_eq!(instance_data_index, culling_data_index);
} }
} }
@ -1220,7 +1272,7 @@ impl GetBatchData for MeshPipeline {
type BufferData = MeshUniform; type BufferData = MeshUniform;
fn get_batch_data( fn get_batch_data(
(mesh_instances, lightmaps, _, _): &SystemParamItem<Self::Param>, (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem<Self::Param>,
entity: Entity, entity: Entity,
) -> Option<(Self::BufferData, Option<Self::CompareData>)> { ) -> Option<(Self::BufferData, Option<Self::CompareData>)> {
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
@ -1231,11 +1283,17 @@ impl GetBatchData for MeshPipeline {
return None; return None;
}; };
let mesh_instance = mesh_instances.get(&entity)?; let mesh_instance = mesh_instances.get(&entity)?;
let first_vertex_index =
match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) {
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
let maybe_lightmap = lightmaps.render_lightmaps.get(&entity); let maybe_lightmap = lightmaps.render_lightmaps.get(&entity);
Some(( Some((
MeshUniform::new( MeshUniform::new(
&mesh_instance.transforms, &mesh_instance.transforms,
first_vertex_index,
maybe_lightmap.map(|lightmap| lightmap.uv_rect), maybe_lightmap.map(|lightmap| lightmap.uv_rect),
), ),
mesh_instance.should_batch().then_some(( mesh_instance.should_batch().then_some((
@ -1277,7 +1335,7 @@ impl GetFullBatchData for MeshPipeline {
} }
fn get_binned_batch_data( fn get_binned_batch_data(
(mesh_instances, lightmaps, _, _): &SystemParamItem<Self::Param>, (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem<Self::Param>,
entity: Entity, entity: Entity,
) -> Option<Self::BufferData> { ) -> Option<Self::BufferData> {
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
@ -1287,10 +1345,16 @@ impl GetFullBatchData for MeshPipeline {
return None; return None;
}; };
let mesh_instance = mesh_instances.get(&entity)?; let mesh_instance = mesh_instances.get(&entity)?;
let first_vertex_index =
match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) {
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
let maybe_lightmap = lightmaps.render_lightmaps.get(&entity); let maybe_lightmap = lightmaps.render_lightmaps.get(&entity);
Some(MeshUniform::new( Some(MeshUniform::new(
&mesh_instance.transforms, &mesh_instance.transforms,
first_vertex_index,
maybe_lightmap.map(|lightmap| lightmap.uv_rect), maybe_lightmap.map(|lightmap| lightmap.uv_rect),
)) ))
} }
@ -2354,7 +2418,7 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
} }
RenderMeshBufferInfo::NonIndexed => match indirect_parameters { RenderMeshBufferInfo::NonIndexed => match indirect_parameters {
None => { None => {
pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); pass.draw(vertex_buffer_slice.range, batch_range.clone());
} }
Some((indirect_parameters_offset, indirect_parameters_buffer)) => { Some((indirect_parameters_offset, indirect_parameters_buffer)) => {
pass.draw_indirect(indirect_parameters_buffer, indirect_parameters_offset); pass.draw_indirect(indirect_parameters_buffer, indirect_parameters_offset);

View file

@ -1,4 +1,5 @@
#import bevy_pbr::{ #import bevy_pbr::{
mesh_bindings::mesh,
mesh_functions, mesh_functions,
skinning, skinning,
morph::morph, morph::morph,
@ -9,18 +10,21 @@
#ifdef MORPH_TARGETS #ifdef MORPH_TARGETS
fn morph_vertex(vertex_in: Vertex) -> Vertex { fn morph_vertex(vertex_in: Vertex) -> Vertex {
var vertex = vertex_in; var vertex = vertex_in;
let first_vertex = mesh[vertex.instance_index].first_vertex_index;
let vertex_index = vertex.index - first_vertex;
let weight_count = bevy_pbr::morph::layer_count(); let weight_count = bevy_pbr::morph::layer_count();
for (var i: u32 = 0u; i < weight_count; i ++) { for (var i: u32 = 0u; i < weight_count; i ++) {
let weight = bevy_pbr::morph::weight_at(i); let weight = bevy_pbr::morph::weight_at(i);
if weight == 0.0 { if weight == 0.0 {
continue; continue;
} }
vertex.position += weight * morph(vertex.index, bevy_pbr::morph::position_offset, i); vertex.position += weight * morph(vertex_index, bevy_pbr::morph::position_offset, i);
#ifdef VERTEX_NORMALS #ifdef VERTEX_NORMALS
vertex.normal += weight * morph(vertex.index, bevy_pbr::morph::normal_offset, i); vertex.normal += weight * morph(vertex_index, bevy_pbr::morph::normal_offset, i);
#endif #endif
#ifdef VERTEX_TANGENTS #ifdef VERTEX_TANGENTS
vertex.tangent += vec4(weight * morph(vertex.index, bevy_pbr::morph::tangent_offset, i), 0.0); vertex.tangent += vec4(weight * morph(vertex_index, bevy_pbr::morph::tangent_offset, i), 0.0);
#endif #endif
} }
return vertex; return vertex;

View file

@ -22,6 +22,10 @@ struct MeshInput {
// The index of this mesh's `MeshInput` in the `previous_input` array, if // The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`. // applicable. If not present, this is `u32::MAX`.
previous_input_index: u32, previous_input_index: u32,
first_vertex_index: u32,
pad_a: u32,
pad_b: u32,
pad_c: u32,
} }
// Information about each mesh instance needed to cull it on GPU. // Information about each mesh instance needed to cull it on GPU.
@ -186,4 +190,5 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b; output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b;
output[mesh_output_index].flags = current_input[input_index].flags; output[mesh_output_index].flags = current_input[input_index].flags;
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index;
} }

View file

@ -15,6 +15,11 @@ struct Mesh {
// 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options.
flags: u32, flags: u32,
lightmap_uv_rect: vec2<u32>, lightmap_uv_rect: vec2<u32>,
// The index of the mesh's first vertex in the vertex buffer.
first_vertex_index: u32,
pad_a: u32,
pad_b: u32,
pad_c: u32,
}; };
#ifdef SKINNED #ifdef SKINNED

View file

@ -472,10 +472,14 @@ impl ViewNode for VolumetricFogNode {
render_pass render_pass
.set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format); .set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format);
render_pass.draw_indexed(0..*count, 0, 0..1); render_pass.draw_indexed(
index_buffer_slice.range.start..(index_buffer_slice.range.start + count),
vertex_buffer_slice.range.start as i32,
0..1,
);
} }
RenderMeshBufferInfo::NonIndexed => { RenderMeshBufferInfo::NonIndexed => {
render_pass.draw(0..render_mesh.vertex_count, 0..1); render_pass.draw(vertex_buffer_slice.range, 0..1);
} }
} }
} }