diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index f13b44808d..1d4bf7ffe6 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -138,7 +138,7 @@ pub fn extract_meshlet_meshes( gpu_scene .instance_uniforms .get_mut() - .push(MeshUniform::new(&transforms, None)); + .push(MeshUniform::new(&transforms, 0, None)); } } diff --git a/crates/bevy_pbr/src/prepass/prepass.wgsl b/crates/bevy_pbr/src/prepass/prepass.wgsl index cb1328fae1..7a0fcb89ed 100644 --- a/crates/bevy_pbr/src/prepass/prepass.wgsl +++ b/crates/bevy_pbr/src/prepass/prepass.wgsl @@ -1,5 +1,6 @@ #import bevy_pbr::{ prepass_bindings, + mesh_bindings::mesh, mesh_functions, prepass_io::{Vertex, VertexOutput, FragmentOutput}, skinning, @@ -15,18 +16,21 @@ #ifdef MORPH_TARGETS fn morph_vertex(vertex_in: Vertex) -> Vertex { var vertex = vertex_in; + let first_vertex = mesh[vertex.instance_index].first_vertex_index; + let vertex_index = vertex.index - first_vertex; + let weight_count = morph::layer_count(); for (var i: u32 = 0u; i < weight_count; i ++) { let weight = morph::weight_at(i); if weight == 0.0 { continue; } - vertex.position += weight * morph::morph(vertex.index, morph::position_offset, i); + vertex.position += weight * morph::morph(vertex_index, morph::position_offset, i); #ifdef VERTEX_NORMALS - vertex.normal += weight * morph::morph(vertex.index, morph::normal_offset, i); + vertex.normal += weight * morph::morph(vertex_index, morph::normal_offset, i); #endif #ifdef VERTEX_TANGENTS - vertex.tangent += vec4(weight * morph::morph(vertex.index, morph::tangent_offset, i), 0.0); + vertex.tangent += vec4(weight * morph::morph(vertex_index, morph::tangent_offset, i), 0.0); #endif } return vertex; diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 56f1cd1289..fcc9b6ba78 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -186,8 +186,8 @@ impl Plugin for MeshRenderPlugin { if use_gpu_instance_buffer_builder { render_app - .init_resource::>( - ) + .init_resource::>() + .init_resource::() .add_systems( ExtractSchedule, extract_meshes_for_gpu_building.in_set(ExtractMeshesSet), @@ -200,6 +200,9 @@ impl Plugin for MeshRenderPlugin { gpu_preprocessing::delete_old_work_item_buffers:: .in_set(RenderSet::ManageViews) .after(prepare_view_targets), + collect_meshes_for_gpu_building + .in_set(RenderSet::PrepareAssets) + .after(allocator::allocate_and_free_meshes), ), ); } else { @@ -275,6 +278,18 @@ pub struct MeshUniform { // // (MSB: most significant bit; LSB: least significant bit.) pub lightmap_uv_rect: UVec2, + /// The index of this mesh's first vertex in the vertex buffer. + /// + /// Multiple meshes can be packed into a single vertex buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first vertex in + /// this mesh in that buffer. + pub first_vertex_index: u32, + /// Padding. + pub pad_a: u32, + /// Padding. + pub pad_b: u32, + /// Padding. + pub pad_c: u32, } /// Information that has to be transferred from CPU to GPU in order to produce @@ -305,6 +320,18 @@ pub struct MeshInputUniform { /// /// This is used for TAA. If not present, this will be `u32::MAX`. pub previous_input_index: u32, + /// The index of this mesh's first vertex in the vertex buffer. + /// + /// Multiple meshes can be packed into a single vertex buffer (see + /// [`MeshAllocator`]). This value stores the offset of the first vertex in + /// this mesh in that buffer. + pub first_vertex_index: u32, + /// Padding. + pub pad_a: u32, + /// Padding. + pub pad_b: u32, + /// Padding. + pub pad_c: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -331,7 +358,11 @@ pub struct MeshCullingData { pub struct MeshCullingDataBuffer(RawBufferVec); impl MeshUniform { - pub fn new(mesh_transforms: &MeshTransforms, maybe_lightmap_uv_rect: Option) -> Self { + pub fn new( + mesh_transforms: &MeshTransforms, + first_vertex_index: u32, + maybe_lightmap_uv_rect: Option, + ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = mesh_transforms.world_from_local.inverse_transpose_3x3(); Self { @@ -341,6 +372,10 @@ impl MeshUniform { local_from_world_transpose_a, local_from_world_transpose_b, flags: mesh_transforms.flags, + first_vertex_index, + pad_a: 0, + pad_b: 0, + pad_c: 0, } } } @@ -515,6 +550,14 @@ pub enum RenderMeshInstanceGpuQueue { GpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder, MeshCullingData)>), } +/// The per-thread queues containing mesh instances, populated during the +/// extract phase. +/// +/// These are filled in [`extract_meshes_for_gpu_building`] and consumed in +/// [`collect_meshes_for_gpu_building`]. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct RenderMeshInstanceGpuQueues(Parallel); + impl RenderMeshInstanceShared { fn from_components( previous_transform: Option<&PreviousGlobalTransform>, @@ -719,7 +762,14 @@ impl RenderMeshInstanceGpuBuilder { entity: Entity, render_mesh_instances: &mut EntityHashMap, current_input_buffer: &mut RawBufferVec, + mesh_allocator: &MeshAllocator, ) -> usize { + let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) + { + Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, + None => 0, + }; + // Push the mesh input uniform. let current_uniform_index = current_input_buffer.push(MeshInputUniform { world_from_local: self.world_from_local.to_transpose(), @@ -729,6 +779,10 @@ impl RenderMeshInstanceGpuBuilder { Some(previous_input_index) => previous_input_index.into(), None => u32::MAX, }, + first_vertex_index, + pad_a: 0, + pad_b: 0, + pad_c: 0, }); // Record the [`RenderMeshInstance`]. @@ -900,11 +954,7 @@ pub fn extract_meshes_for_cpu_building( pub fn extract_meshes_for_gpu_building( mut render_mesh_instances: ResMut, render_visibility_ranges: Res, - mut batched_instance_buffers: ResMut< - gpu_preprocessing::BatchedInstanceBuffers, - >, - mut mesh_culling_data_buffer: ResMut, - mut render_mesh_instance_queues: Local>, + mut render_mesh_instance_queues: ResMut, meshes_query: Extract< Query<( Entity, @@ -1004,13 +1054,6 @@ pub fn extract_meshes_for_gpu_building( queue.push(entity, gpu_mesh_instance_builder, gpu_mesh_culling_data); }, ); - - collect_meshes_for_gpu_building( - render_mesh_instances, - &mut batched_instance_buffers, - &mut mesh_culling_data_buffer, - &mut render_mesh_instance_queues, - ); } /// A system that sets the [`RenderMeshInstanceFlags`] for each mesh based on @@ -1044,22 +1087,28 @@ fn set_mesh_motion_vector_flags( /// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU /// mesh uniforms are built. -fn collect_meshes_for_gpu_building( - render_mesh_instances: &mut RenderMeshInstancesGpu, - batched_instance_buffers: &mut gpu_preprocessing::BatchedInstanceBuffers< - MeshUniform, - MeshInputUniform, +pub fn collect_meshes_for_gpu_building( + render_mesh_instances: ResMut, + batched_instance_buffers: ResMut< + gpu_preprocessing::BatchedInstanceBuffers, >, - mesh_culling_data_buffer: &mut MeshCullingDataBuffer, - render_mesh_instance_queues: &mut Parallel, + mut mesh_culling_data_buffer: ResMut, + mut render_mesh_instance_queues: ResMut, + mesh_allocator: Res, ) { + let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = + render_mesh_instances.into_inner() + else { + return; + }; + // Collect render mesh instances. Build up the uniform buffer. let gpu_preprocessing::BatchedInstanceBuffers { ref mut current_input_buffer, ref mut previous_input_buffer, .. - } = batched_instance_buffers; + } = batched_instance_buffers.into_inner(); // Swap buffers. mem::swap(current_input_buffer, previous_input_buffer); @@ -1076,8 +1125,9 @@ fn collect_meshes_for_gpu_building( for (entity, mesh_instance_builder) in queue.drain(..) { mesh_instance_builder.add_to( entity, - render_mesh_instances, + &mut *render_mesh_instances, current_input_buffer, + &mesh_allocator, ); } } @@ -1085,10 +1135,12 @@ fn collect_meshes_for_gpu_building( for (entity, mesh_instance_builder, mesh_culling_builder) in queue.drain(..) { let instance_data_index = mesh_instance_builder.add_to( entity, - render_mesh_instances, + &mut *render_mesh_instances, current_input_buffer, + &mesh_allocator, ); - let culling_data_index = mesh_culling_builder.add_to(mesh_culling_data_buffer); + let culling_data_index = + mesh_culling_builder.add_to(&mut mesh_culling_data_buffer); debug_assert_eq!(instance_data_index, culling_data_index); } } @@ -1220,7 +1272,7 @@ impl GetBatchData for MeshPipeline { type BufferData = MeshUniform; fn get_batch_data( - (mesh_instances, lightmaps, _, _): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem, entity: Entity, ) -> Option<(Self::BufferData, Option)> { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1231,11 +1283,17 @@ impl GetBatchData for MeshPipeline { return None; }; let mesh_instance = mesh_instances.get(&entity)?; + let first_vertex_index = + match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) { + Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, + None => 0, + }; let maybe_lightmap = lightmaps.render_lightmaps.get(&entity); Some(( MeshUniform::new( &mesh_instance.transforms, + first_vertex_index, maybe_lightmap.map(|lightmap| lightmap.uv_rect), ), mesh_instance.should_batch().then_some(( @@ -1277,7 +1335,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_batch_data( - (mesh_instances, lightmaps, _, _): &SystemParamItem, + (mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem, entity: Entity, ) -> Option { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -1287,10 +1345,16 @@ impl GetFullBatchData for MeshPipeline { return None; }; let mesh_instance = mesh_instances.get(&entity)?; + let first_vertex_index = + match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) { + Some(mesh_vertex_slice) => mesh_vertex_slice.range.start, + None => 0, + }; let maybe_lightmap = lightmaps.render_lightmaps.get(&entity); Some(MeshUniform::new( &mesh_instance.transforms, + first_vertex_index, maybe_lightmap.map(|lightmap| lightmap.uv_rect), )) } @@ -2354,7 +2418,7 @@ impl RenderCommand

for DrawMesh { } RenderMeshBufferInfo::NonIndexed => match indirect_parameters { None => { - pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); + pass.draw(vertex_buffer_slice.range, batch_range.clone()); } Some((indirect_parameters_offset, indirect_parameters_buffer)) => { pass.draw_indirect(indirect_parameters_buffer, indirect_parameters_offset); diff --git a/crates/bevy_pbr/src/render/mesh.wgsl b/crates/bevy_pbr/src/render/mesh.wgsl index f4386d97ed..7d617755ad 100644 --- a/crates/bevy_pbr/src/render/mesh.wgsl +++ b/crates/bevy_pbr/src/render/mesh.wgsl @@ -1,4 +1,5 @@ #import bevy_pbr::{ + mesh_bindings::mesh, mesh_functions, skinning, morph::morph, @@ -9,18 +10,21 @@ #ifdef MORPH_TARGETS fn morph_vertex(vertex_in: Vertex) -> Vertex { var vertex = vertex_in; + let first_vertex = mesh[vertex.instance_index].first_vertex_index; + let vertex_index = vertex.index - first_vertex; + let weight_count = bevy_pbr::morph::layer_count(); for (var i: u32 = 0u; i < weight_count; i ++) { let weight = bevy_pbr::morph::weight_at(i); if weight == 0.0 { continue; } - vertex.position += weight * morph(vertex.index, bevy_pbr::morph::position_offset, i); + vertex.position += weight * morph(vertex_index, bevy_pbr::morph::position_offset, i); #ifdef VERTEX_NORMALS - vertex.normal += weight * morph(vertex.index, bevy_pbr::morph::normal_offset, i); + vertex.normal += weight * morph(vertex_index, bevy_pbr::morph::normal_offset, i); #endif #ifdef VERTEX_TANGENTS - vertex.tangent += vec4(weight * morph(vertex.index, bevy_pbr::morph::tangent_offset, i), 0.0); + vertex.tangent += vec4(weight * morph(vertex_index, bevy_pbr::morph::tangent_offset, i), 0.0); #endif } return vertex; diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 02d8042d5f..6a5a1fcf06 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -22,6 +22,10 @@ struct MeshInput { // The index of this mesh's `MeshInput` in the `previous_input` array, if // applicable. If not present, this is `u32::MAX`. previous_input_index: u32, + first_vertex_index: u32, + pad_a: u32, + pad_b: u32, + pad_c: u32, } // Information about each mesh instance needed to cull it on GPU. @@ -186,4 +190,5 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b; output[mesh_output_index].flags = current_input[input_index].flags; output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; + output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; } diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index 7b45bac0ca..57576a3bb3 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -15,6 +15,11 @@ struct Mesh { // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. flags: u32, lightmap_uv_rect: vec2, + // The index of the mesh's first vertex in the vertex buffer. + first_vertex_index: u32, + pad_a: u32, + pad_b: u32, + pad_c: u32, }; #ifdef SKINNED diff --git a/crates/bevy_pbr/src/volumetric_fog/render.rs b/crates/bevy_pbr/src/volumetric_fog/render.rs index c1420f32a2..de158762d5 100644 --- a/crates/bevy_pbr/src/volumetric_fog/render.rs +++ b/crates/bevy_pbr/src/volumetric_fog/render.rs @@ -472,10 +472,14 @@ impl ViewNode for VolumetricFogNode { render_pass .set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format); - render_pass.draw_indexed(0..*count, 0, 0..1); + render_pass.draw_indexed( + index_buffer_slice.range.start..(index_buffer_slice.range.start + count), + vertex_buffer_slice.range.start as i32, + 0..1, + ); } RenderMeshBufferInfo::NonIndexed => { - render_pass.draw(0..render_mesh.vertex_count, 0..1); + render_pass.draw(vertex_buffer_slice.range, 0..1); } } }