diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index d5b601d651..05bb1d7acc 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -1,7 +1,7 @@ #[cfg(feature = "meshlet")] use crate::meshlet::{ prepare_material_meshlet_meshes_main_opaque_pass, queue_material_meshlet_meshes, - MeshletGpuScene, + InstanceManager, }; use crate::*; use bevy_asset::{Asset, AssetId, AssetServer}; @@ -283,7 +283,7 @@ where Render, queue_material_meshlet_meshes:: .in_set(RenderSet::QueueMeshes) - .run_if(resource_exists::), + .run_if(resource_exists::), ); #[cfg(feature = "meshlet")] @@ -293,7 +293,7 @@ where .in_set(RenderSet::QueueMeshes) .after(prepare_assets::>) .before(queue_material_meshlet_meshes::) - .run_if(resource_exists::), + .run_if(resource_exists::), ); } diff --git a/crates/bevy_pbr/src/meshlet/asset.rs b/crates/bevy_pbr/src/meshlet/asset.rs index 5701e0f288..108cf98151 100644 --- a/crates/bevy_pbr/src/meshlet/asset.rs +++ b/crates/bevy_pbr/src/meshlet/asset.rs @@ -35,8 +35,6 @@ pub const MESHLET_MESH_ASSET_VERSION: u64 = 1; /// See also [`super::MaterialMeshletMeshBundle`] and [`super::MeshletPlugin`]. #[derive(Asset, TypePath, Clone)] pub struct MeshletMesh { - /// The total amount of triangles summed across all LOD 0 meshlets in the mesh. - pub(crate) worst_case_meshlet_triangles: u64, /// Raw vertex data bytes for the overall mesh. pub(crate) vertex_data: Arc<[u8]>, /// Indices into `vertex_data`. @@ -57,6 +55,8 @@ pub struct Meshlet { pub start_vertex_id: u32, /// The offset within the parent mesh's [`MeshletMesh::indices`] buffer where the indices for this meshlet begin. pub start_index_id: u32, + /// The amount of vertices in this meshlet. + pub vertex_count: u32, /// The amount of triangles in this meshlet. pub triangle_count: u32, } @@ -107,9 +107,6 @@ impl AssetSaver for MeshletMeshSaverLoader { .await?; // Compress and write asset data - writer - .write_all(&asset.worst_case_meshlet_triangles.to_le_bytes()) - .await?; let mut writer = FrameEncoder::new(AsyncWriteSyncAdapter(writer)); write_slice(&asset.vertex_data, &mut writer)?; write_slice(&asset.vertex_ids, &mut writer)?; @@ -146,7 +143,6 @@ impl AssetLoader for MeshletMeshSaverLoader { } // Load and decompress asset data - let worst_case_meshlet_triangles = async_read_u64(reader).await?; let reader = &mut FrameDecoder::new(AsyncReadSyncAdapter(reader)); let vertex_data = read_slice(reader)?; let vertex_ids = read_slice(reader)?; @@ -155,7 +151,6 @@ impl AssetLoader for MeshletMeshSaverLoader { let bounding_spheres = read_slice(reader)?; Ok(MeshletMesh { - worst_case_meshlet_triangles, vertex_data, vertex_ids, indices, diff --git a/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl b/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl deleted file mode 100644 index 177cbc35a3..0000000000 --- a/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl +++ /dev/null @@ -1,10 +0,0 @@ -#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput - -@group(0) @binding(0) var material_depth: texture_2d; - -/// This pass copies the R16Uint material depth texture to an actual Depth16Unorm depth texture. - -@fragment -fn copy_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { - return f32(textureLoad(material_depth, vec2(in.position.xy), 0).r) / 65535.0; -} diff --git a/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl b/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl index b73792aa5d..fe5df60f12 100644 --- a/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl +++ b/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl @@ -9,9 +9,10 @@ previous_view, should_cull_instance, cluster_is_second_pass_candidate, - meshlets, - draw_indirect_args, - draw_triangle_buffer, + meshlet_software_raster_indirect_args, + meshlet_hardware_raster_indirect_args, + meshlet_raster_clusters, + meshlet_raster_cluster_rightmost_slot, } #import bevy_render::maths::affine3_to_square @@ -25,10 +26,10 @@ fn cull_clusters( @builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, - @builtin(local_invocation_id) local_invocation_id: vec3, + @builtin(local_invocation_index) local_invocation_index: u32, ) { // Calculate the cluster ID for this thread - let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); if cluster_id >= arrayLength(&meshlet_cluster_meshlet_ids) { return; } #ifdef MESHLET_SECOND_CULLING_PASS @@ -47,8 +48,8 @@ fn cull_clusters( let world_from_local = affine3_to_square(instance_uniform.world_from_local); let world_scale = max(length(world_from_local[0]), max(length(world_from_local[1]), length(world_from_local[2]))); let bounding_spheres = meshlet_bounding_spheres[meshlet_id]; - var culling_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); - var culling_bounding_sphere_radius = world_scale * bounding_spheres.self_culling.radius; + let culling_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); + let culling_bounding_sphere_radius = world_scale * bounding_spheres.self_culling.radius; #ifdef MESHLET_FIRST_CULLING_PASS // Frustum culling @@ -59,17 +60,17 @@ fn cull_clusters( } } - // Calculate view-space LOD bounding sphere for the meshlet + // Calculate view-space LOD bounding sphere for the cluster let lod_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_lod.center, 1.0); let lod_bounding_sphere_radius = world_scale * bounding_spheres.self_lod.radius; let lod_bounding_sphere_center_view_space = (view.view_from_world * vec4(lod_bounding_sphere_center.xyz, 1.0)).xyz; - // Calculate view-space LOD bounding sphere for the meshlet's parent + // Calculate view-space LOD bounding sphere for the cluster's parent let parent_lod_bounding_sphere_center = world_from_local * vec4(bounding_spheres.parent_lod.center, 1.0); let parent_lod_bounding_sphere_radius = world_scale * bounding_spheres.parent_lod.radius; let parent_lod_bounding_sphere_center_view_space = (view.view_from_world * vec4(parent_lod_bounding_sphere_center.xyz, 1.0)).xyz; - // Check LOD cut (meshlet error imperceptible, and parent error not imperceptible) + // Check LOD cut (cluster error imperceptible, and parent error not imperceptible) let lod_is_ok = lod_error_is_imperceptible(lod_bounding_sphere_center_view_space, lod_bounding_sphere_radius); let parent_lod_is_ok = lod_error_is_imperceptible(parent_lod_bounding_sphere_center_view_space, parent_lod_bounding_sphere_radius); if !lod_is_ok || parent_lod_is_ok { return; } @@ -79,16 +80,20 @@ fn cull_clusters( #ifdef MESHLET_FIRST_CULLING_PASS let previous_world_from_local = affine3_to_square(instance_uniform.previous_world_from_local); let previous_world_from_local_scale = max(length(previous_world_from_local[0]), max(length(previous_world_from_local[1]), length(previous_world_from_local[2]))); - culling_bounding_sphere_center = previous_world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); - culling_bounding_sphere_radius = previous_world_from_local_scale * bounding_spheres.self_culling.radius; + let occlusion_culling_bounding_sphere_center = previous_world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); + let occlusion_culling_bounding_sphere_radius = previous_world_from_local_scale * bounding_spheres.self_culling.radius; + let occlusion_culling_bounding_sphere_center_view_space = (previous_view.view_from_world * vec4(occlusion_culling_bounding_sphere_center.xyz, 1.0)).xyz; +#else + let occlusion_culling_bounding_sphere_center = culling_bounding_sphere_center; + let occlusion_culling_bounding_sphere_radius = culling_bounding_sphere_radius; + let occlusion_culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(occlusion_culling_bounding_sphere_center.xyz, 1.0)).xyz; #endif - let culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + var aabb = project_view_space_sphere_to_screen_space_aabb(occlusion_culling_bounding_sphere_center_view_space, occlusion_culling_bounding_sphere_radius); let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)); - let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; - let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; - let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 + var aabb_width_pixels = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + var aabb_height_pixels = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + let depth_level = max(0, i32(ceil(log2(max(aabb_width_pixels, aabb_height_pixels))))); // TODO: Naga doesn't like this being a u32 let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); @@ -102,11 +107,11 @@ fn cull_clusters( var cluster_visible: bool; if view.clip_from_view[3][3] == 1.0 { // Orthographic - let sphere_depth = view.clip_from_view[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.clip_from_view[2][2]; + let sphere_depth = view.clip_from_view[3][2] + (occlusion_culling_bounding_sphere_center_view_space.z + occlusion_culling_bounding_sphere_radius) * view.clip_from_view[2][2]; cluster_visible = sphere_depth >= occluder_depth; } else { // Perspective - let sphere_depth = -view.clip_from_view[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); + let sphere_depth = -view.clip_from_view[3][2] / (occlusion_culling_bounding_sphere_center_view_space.z + occlusion_culling_bounding_sphere_radius); cluster_visible = sphere_depth >= occluder_depth; } @@ -118,15 +123,29 @@ fn cull_clusters( } #endif - // Append a list of this cluster's triangles to draw if not culled - if cluster_visible { - let meshlet_triangle_count = meshlets[meshlet_id].triangle_count; - let buffer_start = atomicAdd(&draw_indirect_args.vertex_count, meshlet_triangle_count * 3u) / 3u; - let cluster_id_packed = cluster_id << 6u; - for (var triangle_id = 0u; triangle_id < meshlet_triangle_count; triangle_id++) { - draw_triangle_buffer[buffer_start + triangle_id] = cluster_id_packed | triangle_id; - } + // Cluster would be occluded if drawn, so don't setup a draw for it + if !cluster_visible { return; } + + // Check how big the cluster is in screen space +#ifdef MESHLET_FIRST_CULLING_PASS + let culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; + aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + aabb_width_pixels = (aabb.z - aabb.x) * view.viewport.z; + aabb_height_pixels = (aabb.w - aabb.y) * view.viewport.w; +#endif + let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(32.0)); // TODO: Nanite does something different. Come up with my own heuristic. + + // TODO: Also check if needs depth clipping + var buffer_slot: u32; + if cluster_is_small { + // Append this cluster to the list for software rasterization + buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u); + } else { + // Append this cluster to the list for hardware rasterization + buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u); + buffer_slot = meshlet_raster_cluster_rightmost_slot - buffer_slot; } + meshlet_raster_clusters[buffer_slot] = cluster_id; } // https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space/21649403#21649403 diff --git a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl index 04e8f3f56a..80dd7d4baa 100644 --- a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl +++ b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl @@ -1,4 +1,8 @@ -@group(0) @binding(0) var mip_0: texture_depth_2d; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var mip_0: array; // Per pixel +#else +@group(0) @binding(0) var mip_0: array; // Per pixel +#endif @group(0) @binding(1) var mip_1: texture_storage_2d; @group(0) @binding(2) var mip_2: texture_storage_2d; @group(0) @binding(3) var mip_3: texture_storage_2d; @@ -12,11 +16,16 @@ @group(0) @binding(11) var mip_11: texture_storage_2d; @group(0) @binding(12) var mip_12: texture_storage_2d; @group(0) @binding(13) var samplr: sampler; -var max_mip_level: u32; +struct Constants { max_mip_level: u32, view_width: u32 } +var constants: Constants; /// Generates a hierarchical depth buffer. /// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528 +// TODO: +// * Subgroup support +// * True single pass downsampling + var intermediate_memory: array, 16>; @compute @@ -70,7 +79,7 @@ fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation v[3] = reduce_load_mip_0(tex); textureStore(mip_1, pix, vec4(v[3])); - if max_mip_level <= 1u { return; } + if constants.max_mip_level <= 1u { return; } for (var i = 0u; i < 4u; i++) { intermediate_memory[x][y] = v[i]; @@ -100,19 +109,19 @@ fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation } fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { - if max_mip_level <= 2u { return; } + if constants.max_mip_level <= 2u { return; } workgroupBarrier(); downsample_mip_2(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 3u { return; } + if constants.max_mip_level <= 3u { return; } workgroupBarrier(); downsample_mip_3(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 4u { return; } + if constants.max_mip_level <= 4u { return; } workgroupBarrier(); downsample_mip_4(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 5u { return; } + if constants.max_mip_level <= 5u { return; } workgroupBarrier(); downsample_mip_5(workgroup_id, local_invocation_index); } @@ -191,7 +200,7 @@ fn downsample_mips_6_and_7(x: u32, y: u32) { v[3] = reduce_load_mip_6(tex); textureStore(mip_7, pix, vec4(v[3])); - if max_mip_level <= 7u { return; } + if constants.max_mip_level <= 7u { return; } let vr = reduce_4(v); textureStore(mip_8, vec2(x, y), vec4(vr)); @@ -199,19 +208,19 @@ fn downsample_mips_6_and_7(x: u32, y: u32) { } fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) { - if max_mip_level <= 8u { return; } + if constants.max_mip_level <= 8u { return; } workgroupBarrier(); downsample_mip_8(x, y, local_invocation_index); - if max_mip_level <= 9u { return; } + if constants.max_mip_level <= 9u { return; } workgroupBarrier(); downsample_mip_9(x, y, local_invocation_index); - if max_mip_level <= 10u { return; } + if constants.max_mip_level <= 10u { return; } workgroupBarrier(); downsample_mip_10(x, y, local_invocation_index); - if max_mip_level <= 11u { return; } + if constants.max_mip_level <= 11u { return; } workgroupBarrier(); downsample_mip_11(local_invocation_index); } @@ -275,8 +284,11 @@ fn remap_for_wave_reduction(a: u32) -> vec2u { } fn reduce_load_mip_0(tex: vec2u) -> f32 { - let uv = (vec2f(tex) + 0.5) / vec2f(textureDimensions(mip_0)); - return reduce_4(textureGather(mip_0, samplr, uv)); + let a = load_mip_0(tex.x, tex.y); + let b = load_mip_0(tex.x + 1u, tex.y); + let c = load_mip_0(tex.x, tex.y + 1u); + let d = load_mip_0(tex.x + 1u, tex.y + 1u); + return reduce_4(vec4(a, b, c, d)); } fn reduce_load_mip_6(tex: vec2u) -> f32 { @@ -288,6 +300,15 @@ fn reduce_load_mip_6(tex: vec2u) -> f32 { )); } +fn load_mip_0(x: u32, y: u32) -> f32 { + let i = y * constants.view_width + x; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + return bitcast(u32(mip_0[i] >> 32u)); +#else + return bitcast(mip_0[i]); +#endif +} + fn reduce_4(v: vec4f) -> f32 { return min(min(v.x, v.y), min(v.z, v.w)); } diff --git a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl index f228ba0508..04af6c4ad7 100644 --- a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl +++ b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl @@ -13,11 +13,11 @@ fn fill_cluster_buffers( @builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, - @builtin(local_invocation_id) local_invocation_id: vec3 + @builtin(local_invocation_index) local_invocation_index: u32, ) { // Calculate the cluster ID for this thread - let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); - if cluster_id >= cluster_count { return; } + let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + if cluster_id >= cluster_count { return; } // TODO: Could be an arrayLength? // Binary search to find the instance this cluster belongs to var left = 0u; diff --git a/crates/bevy_pbr/src/meshlet/from_mesh.rs b/crates/bevy_pbr/src/meshlet/from_mesh.rs index f62e00b434..047b1a492e 100644 --- a/crates/bevy_pbr/src/meshlet/from_mesh.rs +++ b/crates/bevy_pbr/src/meshlet/from_mesh.rs @@ -49,11 +49,6 @@ impl MeshletMesh { }, }) .collect::>(); - let worst_case_meshlet_triangles = meshlets - .meshlets - .iter() - .map(|m| m.triangle_count as u64) - .sum(); let mesh_scale = simplify_scale(&vertices); // Build further LODs @@ -87,7 +82,7 @@ impl MeshletMesh { // Add the maximum child error to the parent error to make parent error cumulative from LOD 0 // (we're currently building the parent from its children) - group_error += group_meshlets.iter().fold(group_error, |acc, meshlet_id| { + group_error += group_meshlets.iter().fold(0.0f32, |acc, meshlet_id| { acc.max(bounding_spheres[*meshlet_id].self_lod.radius) }); @@ -140,12 +135,12 @@ impl MeshletMesh { .map(|m| Meshlet { start_vertex_id: m.vertex_offset, start_index_id: m.triangle_offset, + vertex_count: m.vertex_count, triangle_count: m.triangle_count, }) .collect(); Ok(Self { - worst_case_meshlet_triangles, vertex_data: vertex_buffer.into(), vertex_ids: meshlets.vertices.into(), indices: meshlets.triangles.into(), @@ -294,6 +289,7 @@ fn simplify_meshlet_groups( let target_error = target_error_relative * mesh_scale; // Simplify the group to ~50% triangle count + // TODO: Simplify using vertex attributes let mut error = 0.0; let simplified_group_indices = simplify( &group_indices, diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs deleted file mode 100644 index 1d4bf7ffe6..0000000000 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ /dev/null @@ -1,1050 +0,0 @@ -use super::{ - asset::{Meshlet, MeshletBoundingSpheres, MeshletMesh}, - persistent_buffer::PersistentGpuBuffer, -}; -use crate::{ - Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, - PreviousGlobalTransform, RenderMaterialInstances, ShadowView, -}; -use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId}; -use bevy_core_pipeline::{ - core_3d::Camera3d, - prepass::{PreviousViewData, PreviousViewUniforms}, -}; -use bevy_ecs::{ - component::Component, - entity::{Entity, EntityHashMap}, - event::EventReader, - query::{AnyOf, Has}, - system::{Commands, Local, Query, Res, ResMut, Resource, SystemState}, - world::{FromWorld, World}, -}; -use bevy_math::{UVec2, Vec4Swizzles}; -use bevy_render::{ - render_resource::{binding_types::*, *}, - renderer::{RenderDevice, RenderQueue}, - texture::{CachedTexture, TextureCache}, - view::{ExtractedView, RenderLayers, ViewDepthTexture, ViewUniform, ViewUniforms}, - MainWorld, -}; -use bevy_transform::components::GlobalTransform; -use bevy_utils::{default, HashMap, HashSet}; -use encase::internal::WriteInto; -use std::{ - array, iter, - mem::size_of, - ops::{DerefMut, Range}, - sync::{atomic::AtomicBool, Arc}, -}; - -/// Create and queue for uploading to the GPU [`MeshUniform`] components for -/// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh -/// assets that have not already been uploaded to the GPU. -pub fn extract_meshlet_meshes( - mut gpu_scene: ResMut, - // TODO: Replace main_world and system_state when Extract>> is possible - mut main_world: ResMut, - mut system_state: Local< - Option< - SystemState<( - Query<( - Entity, - &Handle, - &GlobalTransform, - Option<&PreviousGlobalTransform>, - Option<&RenderLayers>, - Has, - Has, - )>, - Res, - ResMut>, - EventReader>, - )>, - >, - >, -) { - if system_state.is_none() { - *system_state = Some(SystemState::new(&mut main_world)); - } - let system_state = system_state.as_mut().unwrap(); - - let (instances_query, asset_server, mut assets, mut asset_events) = - system_state.get_mut(&mut main_world); - - // Reset all temporary data for MeshletGpuScene - gpu_scene.reset(); - - // Free GPU buffer space for any modified or dropped MeshletMesh assets - for asset_event in asset_events.read() { - if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { - if let Some(( - [vertex_data_slice, vertex_ids_slice, indices_slice, meshlets_slice, meshlet_bounding_spheres_slice], - _, - )) = gpu_scene.meshlet_mesh_slices.remove(id) - { - gpu_scene.vertex_data.mark_slice_unused(vertex_data_slice); - gpu_scene.vertex_ids.mark_slice_unused(vertex_ids_slice); - gpu_scene.indices.mark_slice_unused(indices_slice); - gpu_scene.meshlets.mark_slice_unused(meshlets_slice); - gpu_scene - .meshlet_bounding_spheres - .mark_slice_unused(meshlet_bounding_spheres_slice); - } - } - } - - for ( - instance, - handle, - transform, - previous_transform, - render_layers, - not_shadow_receiver, - not_shadow_caster, - ) in &instances_query - { - // Skip instances with an unloaded MeshletMesh asset - if asset_server.is_managed(handle.id()) - && !asset_server.is_loaded_with_dependencies(handle.id()) - { - continue; - } - - // Upload the instance's MeshletMesh asset data, if not done already, along with other per-frame per-instance data. - gpu_scene.queue_meshlet_mesh_upload( - instance, - render_layers.cloned().unwrap_or(default()), - not_shadow_caster, - handle, - &mut assets, - ); - - // Build a MeshUniform for each instance - let transform = transform.affine(); - let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); - let mut flags = if not_shadow_receiver { - MeshFlags::empty() - } else { - MeshFlags::SHADOW_RECEIVER - }; - if transform.matrix3.determinant().is_sign_positive() { - flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; - } - let transforms = MeshTransforms { - world_from_local: (&transform).into(), - previous_world_from_local: (&previous_transform).into(), - flags: flags.bits(), - }; - gpu_scene - .instance_uniforms - .get_mut() - .push(MeshUniform::new(&transforms, 0, None)); - } -} - -/// Upload all newly queued [`MeshletMesh`] asset data from [`extract_meshlet_meshes`] to the GPU. -pub fn perform_pending_meshlet_mesh_writes( - mut gpu_scene: ResMut, - render_queue: Res, - render_device: Res, -) { - gpu_scene - .vertex_data - .perform_writes(&render_queue, &render_device); - gpu_scene - .vertex_ids - .perform_writes(&render_queue, &render_device); - gpu_scene - .indices - .perform_writes(&render_queue, &render_device); - gpu_scene - .meshlets - .perform_writes(&render_queue, &render_device); - gpu_scene - .meshlet_bounding_spheres - .perform_writes(&render_queue, &render_device); -} - -/// For each entity in the scene, record what material ID (for use with depth testing during the meshlet mesh material draw nodes) -/// its material was assigned in the `prepare_material_meshlet_meshes` systems, and note that the material is used by at least one entity in the scene. -pub fn queue_material_meshlet_meshes( - mut gpu_scene: ResMut, - render_material_instances: Res>, -) { - // TODO: Ideally we could parallelize this system, both between different materials, and the loop over instances - let gpu_scene = gpu_scene.deref_mut(); - - for (i, (instance, _, _)) in gpu_scene.instances.iter().enumerate() { - if let Some(material_asset_id) = render_material_instances.get(instance) { - let material_asset_id = material_asset_id.untyped(); - if let Some(material_id) = gpu_scene.material_id_lookup.get(&material_asset_id) { - gpu_scene.material_ids_present_in_scene.insert(*material_id); - gpu_scene.instance_material_ids.get_mut()[i] = *material_id; - } - } - } -} - -// TODO: Try using Queue::write_buffer_with() in queue_meshlet_mesh_upload() to reduce copies -fn upload_storage_buffer( - buffer: &mut StorageBuffer>, - render_device: &RenderDevice, - render_queue: &RenderQueue, -) where - Vec: WriteInto, -{ - let inner = buffer.buffer(); - let capacity = inner.map_or(0, |b| b.size()); - let size = buffer.get().size().get() as BufferAddress; - - if capacity >= size { - let inner = inner.unwrap(); - let bytes = bytemuck::must_cast_slice(buffer.get().as_slice()); - render_queue.write_buffer(inner, 0, bytes); - } else { - buffer.write_buffer(render_device, render_queue); - } -} - -pub fn prepare_meshlet_per_frame_resources( - mut gpu_scene: ResMut, - views: Query<( - Entity, - &ExtractedView, - Option<&RenderLayers>, - AnyOf<(&Camera3d, &ShadowView)>, - )>, - mut texture_cache: ResMut, - render_queue: Res, - render_device: Res, - mut commands: Commands, -) { - if gpu_scene.scene_meshlet_count == 0 { - return; - } - - let gpu_scene = gpu_scene.as_mut(); - - gpu_scene - .instance_uniforms - .write_buffer(&render_device, &render_queue); - upload_storage_buffer( - &mut gpu_scene.instance_material_ids, - &render_device, - &render_queue, - ); - upload_storage_buffer( - &mut gpu_scene.instance_meshlet_counts_prefix_sum, - &render_device, - &render_queue, - ); - upload_storage_buffer( - &mut gpu_scene.instance_meshlet_slice_starts, - &render_device, - &render_queue, - ); - - // Early submission for GPU data uploads to start while the render graph records commands - render_queue.submit([]); - - let needed_buffer_size = 4 * gpu_scene.scene_meshlet_count as u64; - match &mut gpu_scene.cluster_instance_ids { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_cluster_instance_ids"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - match &mut gpu_scene.cluster_meshlet_ids { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_cluster_meshlet_ids"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let needed_buffer_size = 4 * gpu_scene.scene_triangle_count; - let visibility_buffer_draw_triangle_buffer = - match &mut gpu_scene.visibility_buffer_draw_triangle_buffer { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_visibility_buffer_draw_triangle_buffer"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let needed_buffer_size = - gpu_scene.scene_meshlet_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; - for (view_entity, view, render_layers, (_, shadow_view)) in &views { - let instance_visibility = gpu_scene - .view_instance_visibility - .entry(view_entity) - .or_insert_with(|| { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_view_instance_visibility")); - buffer - }); - for (instance_index, (_, layers, not_shadow_caster)) in - gpu_scene.instances.iter().enumerate() - { - // If either the layers don't match the view's layers or this is a shadow view - // and the instance is not a shadow caster, hide the instance for this view - if !render_layers.unwrap_or(&default()).intersects(layers) - || (shadow_view.is_some() && *not_shadow_caster) - { - let vec = instance_visibility.get_mut(); - let index = instance_index / 32; - let bit = instance_index - index * 32; - if vec.len() <= index { - vec.extend(iter::repeat(0).take(index - vec.len() + 1)); - } - vec[index] |= 1 << bit; - } - } - upload_storage_buffer(instance_visibility, &render_device, &render_queue); - let instance_visibility = instance_visibility.buffer().unwrap().clone(); - - let second_pass_candidates_buffer = match &mut gpu_scene.second_pass_candidates_buffer { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_second_pass_candidates"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let visibility_buffer = TextureDescriptor { - label: Some("meshlet_visibility_buffer"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Uint, - usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }; - - let visibility_buffer_draw_indirect_args_first = - render_device.create_buffer_with_data(&BufferInitDescriptor { - label: Some("meshlet_visibility_buffer_draw_indirect_args_first"), - contents: DrawIndirectArgs { - vertex_count: 0, - instance_count: 1, - first_vertex: 0, - first_instance: 0, - } - .as_bytes(), - usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, - }); - let visibility_buffer_draw_indirect_args_second = - render_device.create_buffer_with_data(&BufferInitDescriptor { - label: Some("meshlet_visibility_buffer_draw_indirect_args_second"), - contents: DrawIndirectArgs { - vertex_count: 0, - instance_count: 1, - first_vertex: 0, - first_instance: 0, - } - .as_bytes(), - usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, - }); - - let depth_pyramid_size = Extent3d { - width: view.viewport.z.div_ceil(2), - height: view.viewport.w.div_ceil(2), - depth_or_array_layers: 1, - }; - let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); - let depth_pyramid = texture_cache.get( - &render_device, - TextureDescriptor { - label: Some("meshlet_depth_pyramid"), - size: depth_pyramid_size, - mip_level_count: depth_pyramid_mip_count, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Float, - usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }, - ); - let depth_pyramid_mips = array::from_fn(|i| { - if (i as u32) < depth_pyramid_mip_count { - depth_pyramid.texture.create_view(&TextureViewDescriptor { - label: Some("meshlet_depth_pyramid_texture_view"), - format: Some(TextureFormat::R32Float), - dimension: Some(TextureViewDimension::D2), - aspect: TextureAspect::All, - base_mip_level: i as u32, - mip_level_count: Some(1), - base_array_layer: 0, - array_layer_count: Some(1), - }) - } else { - gpu_scene.depth_pyramid_dummy_texture.clone() - } - }); - let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); - - let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) { - Some(texture_view) => texture_view.clone(), - None => depth_pyramid_all_mips.clone(), - }; - gpu_scene - .previous_depth_pyramids - .insert(view_entity, depth_pyramid_all_mips.clone()); - - let material_depth_color = TextureDescriptor { - label: Some("meshlet_material_depth_color"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R16Uint, - usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }; - - let material_depth = TextureDescriptor { - label: Some("meshlet_material_depth"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::Depth16Unorm, - usage: TextureUsages::RENDER_ATTACHMENT, - view_formats: &[], - }; - - let not_shadow_view = shadow_view.is_none(); - commands.entity(view_entity).insert(MeshletViewResources { - scene_meshlet_count: gpu_scene.scene_meshlet_count, - second_pass_candidates_buffer, - instance_visibility, - visibility_buffer: not_shadow_view - .then(|| texture_cache.get(&render_device, visibility_buffer)), - visibility_buffer_draw_indirect_args_first, - visibility_buffer_draw_indirect_args_second, - visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(), - depth_pyramid_all_mips, - depth_pyramid_mips, - depth_pyramid_mip_count, - previous_depth_pyramid, - material_depth_color: not_shadow_view - .then(|| texture_cache.get(&render_device, material_depth_color)), - material_depth: not_shadow_view - .then(|| texture_cache.get(&render_device, material_depth)), - view_size: view.viewport.zw(), - }); - } -} - -pub fn prepare_meshlet_view_bind_groups( - gpu_scene: Res, - views: Query<( - Entity, - &MeshletViewResources, - AnyOf<(&ViewDepthTexture, &ShadowView)>, - )>, - view_uniforms: Res, - previous_view_uniforms: Res, - render_device: Res, - mut commands: Commands, -) { - let ( - Some(cluster_instance_ids), - Some(cluster_meshlet_ids), - Some(view_uniforms), - Some(previous_view_uniforms), - ) = ( - gpu_scene.cluster_instance_ids.as_ref(), - gpu_scene.cluster_meshlet_ids.as_ref(), - view_uniforms.uniforms.binding(), - previous_view_uniforms.uniforms.binding(), - ) - else { - return; - }; - - let first_node = Arc::new(AtomicBool::new(true)); - - // TODO: Some of these bind groups can be reused across multiple views - for (view_entity, view_resources, view_depth) in &views { - let entries = BindGroupEntries::sequential(( - gpu_scene - .instance_meshlet_counts_prefix_sum - .binding() - .unwrap(), - gpu_scene.instance_meshlet_slice_starts.binding().unwrap(), - cluster_instance_ids.as_entire_binding(), - cluster_meshlet_ids.as_entire_binding(), - )); - let fill_cluster_buffers = render_device.create_bind_group( - "meshlet_fill_cluster_buffers", - &gpu_scene.fill_cluster_buffers_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlet_bounding_spheres.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - view_resources.instance_visibility.as_entire_binding(), - view_resources - .second_pass_candidates_buffer - .as_entire_binding(), - gpu_scene.meshlets.binding(), - view_resources - .visibility_buffer_draw_indirect_args_first - .as_entire_binding(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - &view_resources.previous_depth_pyramid, - view_uniforms.clone(), - previous_view_uniforms.clone(), - )); - let culling_first = render_device.create_bind_group( - "meshlet_culling_first_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlet_bounding_spheres.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - view_resources.instance_visibility.as_entire_binding(), - view_resources - .second_pass_candidates_buffer - .as_entire_binding(), - gpu_scene.meshlets.binding(), - view_resources - .visibility_buffer_draw_indirect_args_second - .as_entire_binding(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - &view_resources.depth_pyramid_all_mips, - view_uniforms.clone(), - previous_view_uniforms.clone(), - )); - let culling_second = render_device.create_bind_group( - "meshlet_culling_second_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let view_depth_texture = match view_depth { - (Some(view_depth), None) => view_depth.view(), - (None, Some(shadow_view)) => &shadow_view.depth_attachment.view, - _ => unreachable!(), - }; - let downsample_depth = render_device.create_bind_group( - "meshlet_downsample_depth_bind_group", - &gpu_scene.downsample_depth_bind_group_layout, - &BindGroupEntries::sequential(( - view_depth_texture, - &view_resources.depth_pyramid_mips[0], - &view_resources.depth_pyramid_mips[1], - &view_resources.depth_pyramid_mips[2], - &view_resources.depth_pyramid_mips[3], - &view_resources.depth_pyramid_mips[4], - &view_resources.depth_pyramid_mips[5], - &view_resources.depth_pyramid_mips[6], - &view_resources.depth_pyramid_mips[7], - &view_resources.depth_pyramid_mips[8], - &view_resources.depth_pyramid_mips[9], - &view_resources.depth_pyramid_mips[10], - &view_resources.depth_pyramid_mips[11], - &gpu_scene.depth_pyramid_sampler, - )), - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlets.binding(), - gpu_scene.indices.binding(), - gpu_scene.vertex_ids.binding(), - gpu_scene.vertex_data.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - gpu_scene.instance_material_ids.binding().unwrap(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - view_uniforms.clone(), - )); - let visibility_buffer_raster = render_device.create_bind_group( - "meshlet_visibility_raster_buffer_bind_group", - &gpu_scene.visibility_buffer_raster_bind_group_layout, - &entries, - ); - - let copy_material_depth = - view_resources - .material_depth_color - .as_ref() - .map(|material_depth_color| { - render_device.create_bind_group( - "meshlet_copy_material_depth_bind_group", - &gpu_scene.copy_material_depth_bind_group_layout, - &[BindGroupEntry { - binding: 0, - resource: BindingResource::TextureView( - &material_depth_color.default_view, - ), - }], - ) - }); - - let material_draw = view_resources - .visibility_buffer - .as_ref() - .map(|visibility_buffer| { - let entries = BindGroupEntries::sequential(( - &visibility_buffer.default_view, - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlets.binding(), - gpu_scene.indices.binding(), - gpu_scene.vertex_ids.binding(), - gpu_scene.vertex_data.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - )); - render_device.create_bind_group( - "meshlet_mesh_material_draw_bind_group", - &gpu_scene.material_draw_bind_group_layout, - &entries, - ) - }); - - commands.entity(view_entity).insert(MeshletViewBindGroups { - first_node: Arc::clone(&first_node), - fill_cluster_buffers, - culling_first, - culling_second, - downsample_depth, - visibility_buffer_raster, - copy_material_depth, - material_draw, - }); - } -} - -/// A resource that manages GPU data for rendering [`MeshletMesh`]'s. -#[derive(Resource)] -pub struct MeshletGpuScene { - vertex_data: PersistentGpuBuffer>, - vertex_ids: PersistentGpuBuffer>, - indices: PersistentGpuBuffer>, - meshlets: PersistentGpuBuffer>, - meshlet_bounding_spheres: PersistentGpuBuffer>, - meshlet_mesh_slices: HashMap, ([Range; 5], u64)>, - - scene_meshlet_count: u32, - scene_triangle_count: u64, - next_material_id: u32, - material_id_lookup: HashMap, - material_ids_present_in_scene: HashSet, - /// Per-instance [`Entity`], [`RenderLayers`], and [`NotShadowCaster`] - instances: Vec<(Entity, RenderLayers, bool)>, - /// Per-instance transforms, model matrices, and render flags - instance_uniforms: StorageBuffer>, - /// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support. - view_instance_visibility: EntityHashMap>>, - instance_material_ids: StorageBuffer>, - instance_meshlet_counts_prefix_sum: StorageBuffer>, - instance_meshlet_slice_starts: StorageBuffer>, - cluster_instance_ids: Option, - cluster_meshlet_ids: Option, - second_pass_candidates_buffer: Option, - previous_depth_pyramids: EntityHashMap, - visibility_buffer_draw_triangle_buffer: Option, - - fill_cluster_buffers_bind_group_layout: BindGroupLayout, - culling_bind_group_layout: BindGroupLayout, - visibility_buffer_raster_bind_group_layout: BindGroupLayout, - downsample_depth_bind_group_layout: BindGroupLayout, - copy_material_depth_bind_group_layout: BindGroupLayout, - material_draw_bind_group_layout: BindGroupLayout, - depth_pyramid_sampler: Sampler, - depth_pyramid_dummy_texture: TextureView, -} - -impl FromWorld for MeshletGpuScene { - fn from_world(world: &mut World) -> Self { - let render_device = world.resource::(); - - Self { - vertex_data: PersistentGpuBuffer::new("meshlet_vertex_data", render_device), - vertex_ids: PersistentGpuBuffer::new("meshlet_vertex_ids", render_device), - indices: PersistentGpuBuffer::new("meshlet_indices", render_device), - meshlets: PersistentGpuBuffer::new("meshlets", render_device), - meshlet_bounding_spheres: PersistentGpuBuffer::new( - "meshlet_bounding_spheres", - render_device, - ), - meshlet_mesh_slices: HashMap::new(), - - scene_meshlet_count: 0, - scene_triangle_count: 0, - next_material_id: 0, - material_id_lookup: HashMap::new(), - material_ids_present_in_scene: HashSet::new(), - instances: Vec::new(), - instance_uniforms: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_uniforms")); - buffer - }, - view_instance_visibility: EntityHashMap::default(), - instance_material_ids: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_material_ids")); - buffer - }, - instance_meshlet_counts_prefix_sum: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum")); - buffer - }, - instance_meshlet_slice_starts: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_meshlet_slice_starts")); - buffer - }, - cluster_instance_ids: None, - cluster_meshlet_ids: None, - second_pass_candidates_buffer: None, - previous_depth_pyramids: EntityHashMap::default(), - visibility_buffer_draw_triangle_buffer: None, - - // TODO: Buffer min sizes - fill_cluster_buffers_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_fill_cluster_buffers_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), - ), - ), - ), - culling_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_culling_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), - texture_2d(TextureSampleType::Float { filterable: false }), - uniform_buffer::(true), - uniform_buffer::(true), - ), - ), - ), - downsample_depth_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_downsample_depth_bind_group_layout", - &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { - let write_only_r32float = || { - texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) - }; - ( - texture_depth_2d(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - texture_storage_2d( - TextureFormat::R32Float, - StorageTextureAccess::ReadWrite, - ), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - sampler(SamplerBindingType::NonFiltering), - ) - }), - ), - visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_visibility_buffer_raster_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::VERTEX, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - uniform_buffer::(true), - ), - ), - ), - copy_material_depth_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_copy_material_depth_bind_group_layout", - &BindGroupLayoutEntries::single( - ShaderStages::FRAGMENT, - texture_2d(TextureSampleType::Uint), - ), - ), - material_draw_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_mesh_material_draw_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::FRAGMENT, - ( - texture_2d(TextureSampleType::Uint), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - ), - ), - ), - depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { - label: Some("meshlet_depth_pyramid_sampler"), - ..default() - }), - depth_pyramid_dummy_texture: render_device - .create_texture(&TextureDescriptor { - label: Some("meshlet_depth_pyramid_dummy_texture"), - size: Extent3d { - width: 1, - height: 1, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Float, - usage: TextureUsages::STORAGE_BINDING, - view_formats: &[], - }) - .create_view(&TextureViewDescriptor { - label: Some("meshlet_depth_pyramid_dummy_texture_view"), - format: Some(TextureFormat::R32Float), - dimension: Some(TextureViewDimension::D2), - aspect: TextureAspect::All, - base_mip_level: 0, - mip_level_count: Some(1), - base_array_layer: 0, - array_layer_count: Some(1), - }), - } - } -} - -impl MeshletGpuScene { - /// Clear per-frame CPU->GPU upload buffers and reset all per-frame data. - fn reset(&mut self) { - // TODO: Shrink capacity if saturation is low - self.scene_meshlet_count = 0; - self.scene_triangle_count = 0; - self.next_material_id = 0; - self.material_id_lookup.clear(); - self.material_ids_present_in_scene.clear(); - self.instances.clear(); - self.view_instance_visibility - .values_mut() - .for_each(|b| b.get_mut().clear()); - self.instance_uniforms.get_mut().clear(); - self.instance_material_ids.get_mut().clear(); - self.instance_meshlet_counts_prefix_sum.get_mut().clear(); - self.instance_meshlet_slice_starts.get_mut().clear(); - // TODO: Remove unused entries for view_instance_visibility and previous_depth_pyramids - } - - fn queue_meshlet_mesh_upload( - &mut self, - instance: Entity, - render_layers: RenderLayers, - not_shadow_caster: bool, - handle: &Handle, - assets: &mut Assets, - ) { - let queue_meshlet_mesh = |asset_id: &AssetId| { - let meshlet_mesh = assets.remove_untracked(*asset_id).expect( - "MeshletMesh asset was already unloaded but is not registered with MeshletGpuScene", - ); - - let vertex_data_slice = self - .vertex_data - .queue_write(Arc::clone(&meshlet_mesh.vertex_data), ()); - let vertex_ids_slice = self.vertex_ids.queue_write( - Arc::clone(&meshlet_mesh.vertex_ids), - vertex_data_slice.start, - ); - let indices_slice = self - .indices - .queue_write(Arc::clone(&meshlet_mesh.indices), ()); - let meshlets_slice = self.meshlets.queue_write( - Arc::clone(&meshlet_mesh.meshlets), - (vertex_ids_slice.start, indices_slice.start), - ); - let meshlet_bounding_spheres_slice = self - .meshlet_bounding_spheres - .queue_write(Arc::clone(&meshlet_mesh.bounding_spheres), ()); - - ( - [ - vertex_data_slice, - vertex_ids_slice, - indices_slice, - meshlets_slice, - meshlet_bounding_spheres_slice, - ], - meshlet_mesh.worst_case_meshlet_triangles, - ) - }; - - // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading - let ([_, _, _, meshlets_slice, _], triangle_count) = self - .meshlet_mesh_slices - .entry(handle.id()) - .or_insert_with_key(queue_meshlet_mesh) - .clone(); - - let meshlets_slice = (meshlets_slice.start as u32 / size_of::() as u32) - ..(meshlets_slice.end as u32 / size_of::() as u32); - - // Append instance data for this frame - self.instances - .push((instance, render_layers, not_shadow_caster)); - self.instance_material_ids.get_mut().push(0); - self.instance_meshlet_counts_prefix_sum - .get_mut() - .push(self.scene_meshlet_count); - self.instance_meshlet_slice_starts - .get_mut() - .push(meshlets_slice.start); - - self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start; - self.scene_triangle_count += triangle_count; - } - - /// Get the depth value for use with the material depth texture for a given [`Material`] asset. - pub fn get_material_id(&mut self, material_id: UntypedAssetId) -> u32 { - *self - .material_id_lookup - .entry(material_id) - .or_insert_with(|| { - self.next_material_id += 1; - self.next_material_id - }) - } - - pub fn material_present_in_scene(&self, material_id: &u32) -> bool { - self.material_ids_present_in_scene.contains(material_id) - } - - pub fn fill_cluster_buffers_bind_group_layout(&self) -> BindGroupLayout { - self.fill_cluster_buffers_bind_group_layout.clone() - } - - pub fn culling_bind_group_layout(&self) -> BindGroupLayout { - self.culling_bind_group_layout.clone() - } - - pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout { - self.downsample_depth_bind_group_layout.clone() - } - - pub fn visibility_buffer_raster_bind_group_layout(&self) -> BindGroupLayout { - self.visibility_buffer_raster_bind_group_layout.clone() - } - - pub fn copy_material_depth_bind_group_layout(&self) -> BindGroupLayout { - self.copy_material_depth_bind_group_layout.clone() - } - - pub fn material_draw_bind_group_layout(&self) -> BindGroupLayout { - self.material_draw_bind_group_layout.clone() - } -} - -#[derive(Component)] -pub struct MeshletViewResources { - pub scene_meshlet_count: u32, - pub second_pass_candidates_buffer: Buffer, - instance_visibility: Buffer, - pub visibility_buffer: Option, - pub visibility_buffer_draw_indirect_args_first: Buffer, - pub visibility_buffer_draw_indirect_args_second: Buffer, - visibility_buffer_draw_triangle_buffer: Buffer, - depth_pyramid_all_mips: TextureView, - depth_pyramid_mips: [TextureView; 12], - pub depth_pyramid_mip_count: u32, - previous_depth_pyramid: TextureView, - pub material_depth_color: Option, - pub material_depth: Option, - pub view_size: UVec2, -} - -#[derive(Component)] -pub struct MeshletViewBindGroups { - pub first_node: Arc, - pub fill_cluster_buffers: BindGroup, - pub culling_first: BindGroup, - pub culling_second: BindGroup, - pub downsample_depth: BindGroup, - pub visibility_buffer_raster: BindGroup, - pub copy_material_depth: Option, - pub material_draw: Option, -} diff --git a/crates/bevy_pbr/src/meshlet/instance_manager.rs b/crates/bevy_pbr/src/meshlet/instance_manager.rs new file mode 100644 index 0000000000..0f370f2200 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/instance_manager.rs @@ -0,0 +1,261 @@ +use super::{meshlet_mesh_manager::MeshletMeshManager, MeshletMesh}; +use crate::{ + Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, + PreviousGlobalTransform, RenderMaterialInstances, +}; +use bevy_asset::{AssetEvent, AssetServer, Assets, Handle, UntypedAssetId}; +use bevy_ecs::{ + entity::{Entities, Entity, EntityHashMap}, + event::EventReader, + query::Has, + system::{Local, Query, Res, ResMut, Resource, SystemState}, +}; +use bevy_render::{render_resource::StorageBuffer, view::RenderLayers, MainWorld}; +use bevy_transform::components::GlobalTransform; +use bevy_utils::{HashMap, HashSet}; +use std::ops::{DerefMut, Range}; + +/// Manages data for each entity with a [`MeshletMesh`]. +#[derive(Resource)] +pub struct InstanceManager { + /// Amount of clusters in the scene (sum of all meshlet counts across all instances) + pub scene_cluster_count: u32, + + /// Per-instance [`Entity`], [`RenderLayers`], and [`NotShadowCaster`] + pub instances: Vec<(Entity, RenderLayers, bool)>, + /// Per-instance [`MeshUniform`] + pub instance_uniforms: StorageBuffer>, + /// Per-instance material ID + pub instance_material_ids: StorageBuffer>, + /// Prefix-sum of meshlet counts per instance + pub instance_meshlet_counts_prefix_sum: StorageBuffer>, + /// Per-instance index to the start of the instance's slice of the meshlets buffer + pub instance_meshlet_slice_starts: StorageBuffer>, + /// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support. + pub view_instance_visibility: EntityHashMap>>, + + /// Next material ID available for a [`Material`] + next_material_id: u32, + /// Map of [`Material`] to material ID + material_id_lookup: HashMap, + /// Set of material IDs used in the scene + material_ids_present_in_scene: HashSet, +} + +impl InstanceManager { + pub fn new() -> Self { + Self { + scene_cluster_count: 0, + + instances: Vec::new(), + instance_uniforms: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_uniforms")); + buffer + }, + instance_material_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_material_ids")); + buffer + }, + instance_meshlet_counts_prefix_sum: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum")); + buffer + }, + instance_meshlet_slice_starts: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_meshlet_slice_starts")); + buffer + }, + view_instance_visibility: EntityHashMap::default(), + + next_material_id: 0, + material_id_lookup: HashMap::new(), + material_ids_present_in_scene: HashSet::new(), + } + } + + #[allow(clippy::too_many_arguments)] + pub fn add_instance( + &mut self, + instance: Entity, + meshlets_slice: Range, + transform: &GlobalTransform, + previous_transform: Option<&PreviousGlobalTransform>, + render_layers: Option<&RenderLayers>, + not_shadow_receiver: bool, + not_shadow_caster: bool, + ) { + // Build a MeshUniform for the instance + let transform = transform.affine(); + let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); + let mut flags = if not_shadow_receiver { + MeshFlags::empty() + } else { + MeshFlags::SHADOW_RECEIVER + }; + if transform.matrix3.determinant().is_sign_positive() { + flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; + } + let transforms = MeshTransforms { + world_from_local: (&transform).into(), + previous_world_from_local: (&previous_transform).into(), + flags: flags.bits(), + }; + let mesh_uniform = MeshUniform::new(&transforms, 0, None); + + // Append instance data + self.instances.push(( + instance, + render_layers.cloned().unwrap_or(RenderLayers::default()), + not_shadow_caster, + )); + self.instance_uniforms.get_mut().push(mesh_uniform); + self.instance_material_ids.get_mut().push(0); + self.instance_meshlet_counts_prefix_sum + .get_mut() + .push(self.scene_cluster_count); + self.instance_meshlet_slice_starts + .get_mut() + .push(meshlets_slice.start); + + self.scene_cluster_count += meshlets_slice.end - meshlets_slice.start; + } + + /// Get the material ID for a [`crate::Material`]. + pub fn get_material_id(&mut self, material_asset_id: UntypedAssetId) -> u32 { + *self + .material_id_lookup + .entry(material_asset_id) + .or_insert_with(|| { + self.next_material_id += 1; + self.next_material_id + }) + } + + pub fn material_present_in_scene(&self, material_id: &u32) -> bool { + self.material_ids_present_in_scene.contains(material_id) + } + + pub fn reset(&mut self, entities: &Entities) { + self.scene_cluster_count = 0; + + self.instances.clear(); + self.instance_uniforms.get_mut().clear(); + self.instance_material_ids.get_mut().clear(); + self.instance_meshlet_counts_prefix_sum.get_mut().clear(); + self.instance_meshlet_slice_starts.get_mut().clear(); + self.view_instance_visibility + .retain(|view_entity, _| entities.contains(*view_entity)); + self.view_instance_visibility + .values_mut() + .for_each(|b| b.get_mut().clear()); + + self.next_material_id = 0; + self.material_id_lookup.clear(); + self.material_ids_present_in_scene.clear(); + } +} + +pub fn extract_meshlet_mesh_entities( + mut meshlet_mesh_manager: ResMut, + mut instance_manager: ResMut, + // TODO: Replace main_world and system_state when Extract>> is possible + mut main_world: ResMut, + mut system_state: Local< + Option< + SystemState<( + Query<( + Entity, + &Handle, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + EventReader>, + &Entities, + )>, + >, + >, +) { + // Get instances query + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (instances_query, asset_server, mut assets, mut asset_events, entities) = + system_state.get_mut(&mut main_world); + + // Reset per-frame data + instance_manager.reset(entities); + + // Free GPU buffer space for any modified or dropped MeshletMesh assets + for asset_event in asset_events.read() { + if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { + meshlet_mesh_manager.remove(id); + } + } + + // Iterate over every instance + for ( + instance, + meshlet_mesh, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ) in &instances_query + { + // Skip instances with an unloaded MeshletMesh asset + // TODO: This is a semi-expensive check + if asset_server.is_managed(meshlet_mesh.id()) + && !asset_server.is_loaded_with_dependencies(meshlet_mesh.id()) + { + continue; + } + + // Upload the instance's MeshletMesh asset data if not done already done + let meshlets_slice = + meshlet_mesh_manager.queue_upload_if_needed(meshlet_mesh.id(), &mut assets); + + // Add the instance's data to the instance manager + instance_manager.add_instance( + instance, + meshlets_slice, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ); + } +} + +/// For each entity in the scene, record what material ID its material was assigned in the `prepare_material_meshlet_meshes` systems, +/// and note that the material is used by at least one entity in the scene. +pub fn queue_material_meshlet_meshes( + mut instance_manager: ResMut, + render_material_instances: Res>, +) { + let instance_manager = instance_manager.deref_mut(); + + for (i, (instance, _, _)) in instance_manager.instances.iter().enumerate() { + if let Some(material_asset_id) = render_material_instances.get(instance) { + if let Some(material_id) = instance_manager + .material_id_lookup + .get(&material_asset_id.untyped()) + { + instance_manager + .material_ids_present_in_scene + .insert(*material_id); + instance_manager.instance_material_ids.get_mut()[i] = *material_id; + } + } + } +} diff --git a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs b/crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs similarity index 95% rename from crates/bevy_pbr/src/meshlet/material_draw_prepare.rs rename to crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs index f572edc0c7..1a5c3e2d56 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs +++ b/crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs @@ -1,4 +1,7 @@ -use super::{MeshletGpuScene, MESHLET_MESH_MATERIAL_SHADER_HANDLE}; +use super::{ + instance_manager::InstanceManager, resource_manager::ResourceManager, + MESHLET_MESH_MATERIAL_SHADER_HANDLE, +}; use crate::{environment_map::EnvironmentMapLight, irradiance_volume::IrradianceVolume, *}; use bevy_asset::AssetServer; use bevy_core_pipeline::{ @@ -22,10 +25,11 @@ use std::hash::Hash; pub struct MeshletViewMaterialsMainOpaquePass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); /// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletMainOpaquePass3dNode`], -/// and register the material with [`MeshletGpuScene`]. +/// and register the material with [`InstanceManager`]. #[allow(clippy::too_many_arguments)] pub fn prepare_material_meshlet_meshes_main_opaque_pass( - mut gpu_scene: ResMut, + resource_manager: ResMut, + mut instance_manager: ResMut, mut cache: Local>, pipeline_cache: Res, material_pipeline: Res>, @@ -167,7 +171,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( label: material_pipeline_descriptor.label, layout: vec![ mesh_pipeline.get_view_layout(view_key.into()).clone(), - gpu_scene.material_draw_bind_group_layout(), + resource_manager.material_shade_bind_group_layout.clone(), material_pipeline.material_layout.clone(), ], push_constant_ranges: vec![], @@ -198,7 +202,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( }), }; - let material_id = gpu_scene.get_material_id(material_id.untyped()); + let material_id = instance_manager.get_material_id(material_id.untyped()); let pipeline_id = *cache.entry(view_key).or_insert_with(|| { pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) @@ -219,10 +223,11 @@ pub struct MeshletViewMaterialsDeferredGBufferPrepass( ); /// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletPrepassNode`], -/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`MeshletGpuScene`]. +/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`InstanceManager`]. #[allow(clippy::too_many_arguments)] pub fn prepare_material_meshlet_meshes_prepass( - mut gpu_scene: ResMut, + resource_manager: ResMut, + mut instance_manager: ResMut, mut cache: Local>, pipeline_cache: Res, prepass_pipeline: Res>, @@ -319,7 +324,7 @@ pub fn prepare_material_meshlet_meshes_prepass( label: material_pipeline_descriptor.label, layout: vec![ view_layout, - gpu_scene.material_draw_bind_group_layout(), + resource_manager.material_shade_bind_group_layout.clone(), prepass_pipeline.material_layout.clone(), ], push_constant_ranges: vec![], @@ -350,7 +355,7 @@ pub fn prepare_material_meshlet_meshes_prepass( }), }; - let material_id = gpu_scene.get_material_id(material_id.untyped()); + let material_id = instance_manager.get_material_id(material_id.untyped()); let pipeline_id = *cache.entry(view_key).or_insert_with(|| { pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) diff --git a/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs b/crates/bevy_pbr/src/meshlet/material_shade_nodes.rs similarity index 91% rename from crates/bevy_pbr/src/meshlet/material_draw_nodes.rs rename to crates/bevy_pbr/src/meshlet/material_shade_nodes.rs index e7b71ea253..9c2d432d88 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs +++ b/crates/bevy_pbr/src/meshlet/material_shade_nodes.rs @@ -1,10 +1,10 @@ use super::{ - gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, - material_draw_prepare::{ + material_pipeline_prepare::{ MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, MeshletViewMaterialsPrepass, }, - MeshletGpuScene, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, + InstanceManager, }; use crate::{ MeshViewBindGroup, PrepassViewBindGroup, ViewEnvironmentMapUniformOffset, ViewFogUniformOffset, @@ -72,15 +72,15 @@ impl ViewNode for MeshletMainOpaquePass3dNode { } let ( - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -116,13 +116,13 @@ impl ViewNode for MeshletMainOpaquePass3dNode { **view_environment_map_offset, ], ); - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { @@ -175,16 +175,16 @@ impl ViewNode for MeshletPrepassNode { let ( Some(prepass_view_bind_group), - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( world.get_resource::(), - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -239,13 +239,13 @@ impl ViewNode for MeshletPrepassNode { ); } - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { @@ -298,16 +298,16 @@ impl ViewNode for MeshletDeferredGBufferPrepassNode { let ( Some(prepass_view_bind_group), - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( world.get_resource::(), - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -367,13 +367,13 @@ impl ViewNode for MeshletDeferredGBufferPrepassNode { ); } - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { diff --git a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl index a3f18cbc9b..f70252b28e 100644 --- a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl +++ b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl @@ -30,6 +30,7 @@ fn unpack_meshlet_vertex(packed: PackedMeshletVertex) -> MeshletVertex { struct Meshlet { start_vertex_id: u32, start_index_id: u32, + vertex_count: u32, triangle_count: u32, } @@ -44,9 +45,15 @@ struct MeshletBoundingSphere { radius: f32, } +struct DispatchIndirectArgs { + x: atomic, + y: u32, + z: u32, +} + struct DrawIndirectArgs { - vertex_count: atomic, - instance_count: u32, + vertex_count: u32, + instance_count: atomic, first_vertex: u32, first_instance: u32, } @@ -60,15 +67,16 @@ var cluster_count: u32; #endif #ifdef MESHLET_CULLING_PASS +var meshlet_raster_cluster_rightmost_slot: u32; @group(0) @binding(0) var meshlet_cluster_meshlet_ids: array; // Per cluster @group(0) @binding(1) var meshlet_bounding_spheres: array; // Per meshlet @group(0) @binding(2) var meshlet_cluster_instance_ids: array; // Per cluster @group(0) @binding(3) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask @group(0) @binding(5) var meshlet_second_pass_candidates: array>; // 1 bit per cluster , packed as a bitmask -@group(0) @binding(6) var meshlets: array; // Per meshlet -@group(0) @binding(7) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(6) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(7) var meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(8) var meshlet_raster_clusters: array; // Single object shared between all workgroups/clusters/triangles @group(0) @binding(9) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass @group(0) @binding(10) var view: View; @group(0) @binding(11) var previous_view: PreviousViewUniforms; @@ -79,6 +87,7 @@ fn should_cull_instance(instance_id: u32) -> bool { return bool(extractBits(packed_visibility, bit_offset, 1u)); } +// TODO: Load 4x per workgroup instead of once per thread? fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool { let packed_candidates = meshlet_second_pass_candidates[cluster_id / 32u]; let bit_offset = cluster_id % 32u; @@ -94,10 +103,16 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool { @group(0) @binding(4) var meshlet_vertex_data: array; // Many per meshlet @group(0) @binding(5) var meshlet_cluster_instance_ids: array; // Per cluster @group(0) @binding(6) var meshlet_instance_uniforms: array; // Per entity instance -@group(0) @binding(7) var meshlet_instance_material_ids: array; // Per entity instance -@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(9) var view: View; +@group(0) @binding(7) var meshlet_raster_clusters: array; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(8) var meshlet_software_raster_cluster_count: u32; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(9) var meshlet_visibility_buffer: array>; // Per pixel +#else +@group(0) @binding(9) var meshlet_visibility_buffer: array>; // Per pixel +#endif +@group(0) @binding(10) var view: View; +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? fn get_meshlet_index(index_id: u32) -> u32 { let packed_index = meshlet_indices[index_id / 4u]; let bit_offset = (index_id % 4u) * 8u; @@ -106,7 +121,7 @@ fn get_meshlet_index(index_id: u32) -> u32 { #endif #ifdef MESHLET_MESH_MATERIAL_PASS -@group(1) @binding(0) var meshlet_visibility_buffer: texture_2d; // Generated from the meshlet raster passes +@group(1) @binding(0) var meshlet_visibility_buffer: array; // Per pixel @group(1) @binding(1) var meshlet_cluster_meshlet_ids: array; // Per cluster @group(1) @binding(2) var meshlets: array; // Per meshlet @group(1) @binding(3) var meshlet_indices: array; // Many per meshlet @@ -115,6 +130,7 @@ fn get_meshlet_index(index_id: u32) -> u32 { @group(1) @binding(6) var meshlet_cluster_instance_ids: array; // Per cluster @group(1) @binding(7) var meshlet_instance_uniforms: array; // Per entity instance +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? fn get_meshlet_index(index_id: u32) -> u32 { let packed_index = meshlet_indices[index_id / 4u]; let bit_offset = (index_id % 4u) * 8u; diff --git a/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs b/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs new file mode 100644 index 0000000000..03855ec039 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs @@ -0,0 +1,132 @@ +use super::{ + asset::{Meshlet, MeshletBoundingSpheres}, + persistent_buffer::PersistentGpuBuffer, + MeshletMesh, +}; +use bevy_asset::{AssetId, Assets}; +use bevy_ecs::{ + system::{Res, ResMut, Resource}, + world::{FromWorld, World}, +}; +use bevy_render::{ + render_resource::BufferAddress, + renderer::{RenderDevice, RenderQueue}, +}; +use bevy_utils::HashMap; +use std::{mem::size_of, ops::Range, sync::Arc}; + +/// Manages uploading [`MeshletMesh`] asset data to the GPU. +#[derive(Resource)] +pub struct MeshletMeshManager { + pub vertex_data: PersistentGpuBuffer>, + pub vertex_ids: PersistentGpuBuffer>, + pub indices: PersistentGpuBuffer>, + pub meshlets: PersistentGpuBuffer>, + pub meshlet_bounding_spheres: PersistentGpuBuffer>, + meshlet_mesh_slices: HashMap, [Range; 5]>, +} + +impl FromWorld for MeshletMeshManager { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource::(); + Self { + vertex_data: PersistentGpuBuffer::new("meshlet_vertex_data", render_device), + vertex_ids: PersistentGpuBuffer::new("meshlet_vertex_ids", render_device), + indices: PersistentGpuBuffer::new("meshlet_indices", render_device), + meshlets: PersistentGpuBuffer::new("meshlets", render_device), + meshlet_bounding_spheres: PersistentGpuBuffer::new( + "meshlet_bounding_spheres", + render_device, + ), + meshlet_mesh_slices: HashMap::new(), + } + } +} + +impl MeshletMeshManager { + pub fn queue_upload_if_needed( + &mut self, + asset_id: AssetId, + assets: &mut Assets, + ) -> Range { + let queue_meshlet_mesh = |asset_id: &AssetId| { + let meshlet_mesh = assets.remove_untracked(*asset_id).expect( + "MeshletMesh asset was already unloaded but is not registered with MeshletMeshManager", + ); + + let vertex_data_slice = self + .vertex_data + .queue_write(Arc::clone(&meshlet_mesh.vertex_data), ()); + let vertex_ids_slice = self.vertex_ids.queue_write( + Arc::clone(&meshlet_mesh.vertex_ids), + vertex_data_slice.start, + ); + let indices_slice = self + .indices + .queue_write(Arc::clone(&meshlet_mesh.indices), ()); + let meshlets_slice = self.meshlets.queue_write( + Arc::clone(&meshlet_mesh.meshlets), + (vertex_ids_slice.start, indices_slice.start), + ); + let meshlet_bounding_spheres_slice = self + .meshlet_bounding_spheres + .queue_write(Arc::clone(&meshlet_mesh.bounding_spheres), ()); + + [ + vertex_data_slice, + vertex_ids_slice, + indices_slice, + meshlets_slice, + meshlet_bounding_spheres_slice, + ] + }; + + // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading + let [_, _, _, meshlets_slice, _] = self + .meshlet_mesh_slices + .entry(asset_id) + .or_insert_with_key(queue_meshlet_mesh) + .clone(); + + let meshlets_slice_start = meshlets_slice.start as u32 / size_of::() as u32; + let meshlets_slice_end = meshlets_slice.end as u32 / size_of::() as u32; + meshlets_slice_start..meshlets_slice_end + } + + pub fn remove(&mut self, asset_id: &AssetId) { + if let Some( + [vertex_data_slice, vertex_ids_slice, indices_slice, meshlets_slice, meshlet_bounding_spheres_slice], + ) = self.meshlet_mesh_slices.remove(asset_id) + { + self.vertex_data.mark_slice_unused(vertex_data_slice); + self.vertex_ids.mark_slice_unused(vertex_ids_slice); + self.indices.mark_slice_unused(indices_slice); + self.meshlets.mark_slice_unused(meshlets_slice); + self.meshlet_bounding_spheres + .mark_slice_unused(meshlet_bounding_spheres_slice); + } + } +} + +/// Upload all newly queued [`MeshletMesh`] asset data to the GPU. +pub fn perform_pending_meshlet_mesh_writes( + mut meshlet_mesh_manager: ResMut, + render_queue: Res, + render_device: Res, +) { + meshlet_mesh_manager + .vertex_data + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .vertex_ids + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .indices + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlets + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlet_bounding_spheres + .perform_writes(&render_queue, &render_device); +} diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs index a55dc42247..d61dc05c56 100644 --- a/crates/bevy_pbr/src/meshlet/mod.rs +++ b/crates/bevy_pbr/src/meshlet/mod.rs @@ -3,12 +3,14 @@ mod asset; #[cfg(feature = "meshlet_processor")] mod from_mesh; -mod gpu_scene; -mod material_draw_nodes; -mod material_draw_prepare; +mod instance_manager; +mod material_pipeline_prepare; +mod material_shade_nodes; +mod meshlet_mesh_manager; mod persistent_buffer; mod persistent_buffer_impls; mod pipelines; +mod resource_manager; mod visibility_buffer_raster_node; pub mod graph { @@ -24,8 +26,8 @@ pub mod graph { } pub(crate) use self::{ - gpu_scene::{queue_material_meshlet_meshes, MeshletGpuScene}, - material_draw_prepare::{ + instance_manager::{queue_material_meshlet_meshes, InstanceManager}, + material_pipeline_prepare::{ prepare_material_meshlet_meshes_main_opaque_pass, prepare_material_meshlet_meshes_prepass, }, }; @@ -35,22 +37,19 @@ pub use self::asset::{MeshletMesh, MeshletMeshSaverLoader}; pub use self::from_mesh::MeshToMeshletMeshConversionError; use self::{ - gpu_scene::{ - extract_meshlet_meshes, perform_pending_meshlet_mesh_writes, - prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, - }, graph::NodeMeshlet, - material_draw_nodes::{ - MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, - }, - material_draw_prepare::{ + instance_manager::extract_meshlet_mesh_entities, + material_pipeline_prepare::{ MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, MeshletViewMaterialsPrepass, }, - pipelines::{ - MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE, - MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE, - MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + material_shade_nodes::{ + MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, + }, + meshlet_mesh_manager::{perform_pending_meshlet_mesh_writes, MeshletMeshManager}, + pipelines::*, + resource_manager::{ + prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, ResourceManager, }, visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, }; @@ -58,10 +57,7 @@ use crate::{graph::NodePbr, Material}; use bevy_app::{App, Plugin, PostUpdate}; use bevy_asset::{load_internal_asset, AssetApp, Handle}; use bevy_core_pipeline::{ - core_3d::{ - graph::{Core3d, Node3d}, - Camera3d, - }, + core_3d::graph::{Core3d, Node3d}, prepass::{DeferredPrepass, MotionVectorPrepass, NormalPrepass}, }; use bevy_ecs::{ @@ -74,7 +70,7 @@ use bevy_ecs::{ }; use bevy_render::{ render_graph::{RenderGraphApp, ViewNodeRunner}, - render_resource::{Shader, TextureUsages}, + render_resource::Shader, renderer::RenderDevice, settings::WgpuFeatures, view::{ @@ -84,6 +80,7 @@ use bevy_render::{ ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_transform::components::{GlobalTransform, Transform}; +use bevy_utils::tracing::error; const MESHLET_BINDINGS_SHADER_HANDLE: Handle = Handle::weak_from_u128(1325134235233421); const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle = @@ -96,26 +93,46 @@ const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle = /// /// In comparison to Bevy's standard renderer: /// * Much more efficient culling. Meshlets can be culled individually, instead of all or nothing culling for entire meshes at a time. -/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. -/// * Much more efficient batching. All geometry can be rasterized in a single indirect draw. +/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. +/// * Much more efficient batching. All geometry can be rasterized in a single draw. /// * Scales better with large amounts of dense geometry and overdraw. Bevy's standard renderer will bottleneck sooner. /// * Near-seamless level of detail (LOD). -/// * Much greater base overhead. Rendering will be slower than Bevy's standard renderer with small amounts of geometry and overdraw. -/// * Much greater memory usage. +/// * Much greater base overhead. Rendering will be slower and use more memory than Bevy's standard renderer +/// with small amounts of geometry and overdraw. /// * Requires preprocessing meshes. See [`MeshletMesh`] for details. /// * Limitations on the kinds of materials you can use. See [`MeshletMesh`] for details. /// +/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX`]. +/// +/// This plugin currently works only on the Vulkan backend. +/// /// This plugin is not compatible with [`Msaa`]. Any camera rendering a [`MeshletMesh`] must have /// [`Msaa`] set to [`Msaa::Off`]. /// -/// This plugin does not work on Wasm. -/// /// Mixing forward+prepass and deferred rendering for opaque materials is not currently supported when using this plugin. /// You must use one or the other by setting [`crate::DefaultOpaqueRendererMethod`]. /// Do not override [`crate::Material::opaque_render_method`] for any material when using this plugin. /// /// ![A render of the Stanford dragon as a `MeshletMesh`](https://raw.githubusercontent.com/bevyengine/bevy/main/crates/bevy_pbr/src/meshlet/meshlet_preview.png) -pub struct MeshletPlugin; +pub struct MeshletPlugin { + /// The maximum amount of clusters that can be processed at once, + /// used to control the size of a pre-allocated GPU buffer. + /// + /// If this number is too low, you'll see rendering artifacts like missing or blinking meshes. + /// + /// Each cluster slot costs 4 bytes of VRAM. + pub cluster_buffer_slots: u32, +} + +impl MeshletPlugin { + /// [`WgpuFeatures`] required for this plugin to function. + pub fn required_wgpu_features() -> WgpuFeatures { + WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX + | WgpuFeatures::SHADER_INT64 + | WgpuFeatures::SUBGROUP + | WgpuFeatures::PUSH_CONSTANTS + } +} impl Plugin for MeshletPlugin { fn build(&self, app: &mut App) { @@ -154,8 +171,14 @@ impl Plugin for MeshletPlugin { ); load_internal_asset!( app, - MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, - "visibility_buffer_raster.wgsl", + MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + "visibility_buffer_software_raster.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, + "visibility_buffer_hardware_raster.wgsl", Shader::from_wgsl ); load_internal_asset!( @@ -166,8 +189,14 @@ impl Plugin for MeshletPlugin { ); load_internal_asset!( app, - MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, - "copy_material_depth.wgsl", + MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + "resolve_render_targets.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE, + "remap_1d_to_2d_dispatch.wgsl", Shader::from_wgsl ); @@ -184,13 +213,14 @@ impl Plugin for MeshletPlugin { return; }; - if !render_app - .world() - .resource::() - .features() - .contains(WgpuFeatures::PUSH_CONSTANTS) - { - panic!("MeshletPlugin can't be used. GPU lacks support: WgpuFeatures::PUSH_CONSTANTS is not supported."); + let render_device = render_app.world().resource::().clone(); + let features = render_device.features(); + if !features.contains(Self::required_wgpu_features()) { + error!( + "MeshletPlugin can't be used. GPU lacks support for required features: {:?}.", + Self::required_wgpu_features().difference(features) + ); + std::process::exit(1); } render_app @@ -213,24 +243,31 @@ impl Plugin for MeshletPlugin { .add_render_graph_edges( Core3d, ( - // Non-meshlet shading passes _must_ come before meshlet shading passes - NodePbr::ShadowPass, NodeMeshlet::VisibilityBufferRasterPass, + NodePbr::ShadowPass, + // NodeMeshlet::Prepass, Node3d::Prepass, + // NodeMeshlet::DeferredPrepass, Node3d::DeferredPrepass, Node3d::CopyDeferredLightingId, Node3d::EndPrepasses, + // Node3d::StartMainPass, NodeMeshlet::MainOpaquePass, Node3d::MainOpaquePass, Node3d::EndMainPass, ), ) - .init_resource::() + .init_resource::() + .insert_resource(InstanceManager::new()) + .insert_resource(ResourceManager::new( + self.cluster_buffer_slots, + &render_device, + )) .init_resource::() - .add_systems(ExtractSchedule, extract_meshlet_meshes) + .add_systems(ExtractSchedule, extract_meshlet_mesh_entities) .add_systems( Render, ( @@ -281,7 +318,6 @@ pub type WithMeshletMesh = With>; fn configure_meshlet_views( mut views_3d: Query<( Entity, - &mut Camera3d, &Msaa, Has, Has, @@ -289,17 +325,12 @@ fn configure_meshlet_views( )>, mut commands: Commands, ) { - for (entity, mut camera_3d, msaa, normal_prepass, motion_vector_prepass, deferred_prepass) in - &mut views_3d - { + for (entity, msaa, normal_prepass, motion_vector_prepass, deferred_prepass) in &mut views_3d { if *msaa != Msaa::Off { - panic!("MeshletPlugin can't be used. MSAA is not supported."); + error!("MeshletPlugin can't be used with MSAA. Add Msaa::Off to your camera to use this plugin."); + std::process::exit(1); } - let mut usages: TextureUsages = camera_3d.depth_texture_usages.into(); - usages |= TextureUsages::TEXTURE_BINDING; - camera_3d.depth_texture_usages = usages.into(); - if !(normal_prepass || motion_vector_prepass || deferred_prepass) { commands .entity(entity) diff --git a/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs index 64b6861260..da341c285e 100644 --- a/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs +++ b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs @@ -58,6 +58,7 @@ impl PersistentGpuBufferable for Arc<[Meshlet]> { let bytes = bytemuck::cast::<_, [u8; size_of::()]>(Meshlet { start_vertex_id: meshlet.start_vertex_id + vertex_offset, start_index_id: meshlet.start_index_id + index_offset, + vertex_count: meshlet.vertex_count, triangle_count: meshlet.triangle_count, }); buffer_slice[i..(i + size)].clone_from_slice(&bytes); diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs index 0112dcbb67..69f10e015e 100644 --- a/crates/bevy_pbr/src/meshlet/pipelines.rs +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -1,4 +1,4 @@ -use super::gpu_scene::MeshletGpuScene; +use super::resource_manager::ResourceManager; use bevy_asset::Handle; use bevy_core_pipeline::{ core_3d::CORE_3D_DEPTH_FORMAT, fullscreen_vertex_shader::fullscreen_shader_vertex_state, @@ -14,10 +14,14 @@ pub const MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE: Handle = pub const MESHLET_CULLING_SHADER_HANDLE: Handle = Handle::weak_from_u128(5325134235233421); pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = Handle::weak_from_u128(6325134235233421); -pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle = +pub const MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE: Handle = Handle::weak_from_u128(7325134235233421); -pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle = +pub const MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE: Handle = Handle::weak_from_u128(8325134235233421); +pub const MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE: Handle = + Handle::weak_from_u128(9325134235233421); +pub const MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(9425134235233421); #[derive(Resource)] pub struct MeshletPipelines { @@ -26,21 +30,38 @@ pub struct MeshletPipelines { cull_second: CachedComputePipelineId, downsample_depth_first: CachedComputePipelineId, downsample_depth_second: CachedComputePipelineId, - visibility_buffer_raster: CachedRenderPipelineId, - visibility_buffer_raster_depth_only: CachedRenderPipelineId, - visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId, - copy_material_depth: CachedRenderPipelineId, + downsample_depth_first_shadow_view: CachedComputePipelineId, + downsample_depth_second_shadow_view: CachedComputePipelineId, + visibility_buffer_software_raster: CachedComputePipelineId, + visibility_buffer_software_raster_depth_only: CachedComputePipelineId, + visibility_buffer_software_raster_depth_only_clamp_ortho: CachedComputePipelineId, + visibility_buffer_hardware_raster: CachedRenderPipelineId, + visibility_buffer_hardware_raster_depth_only: CachedRenderPipelineId, + visibility_buffer_hardware_raster_depth_only_clamp_ortho: CachedRenderPipelineId, + resolve_depth: CachedRenderPipelineId, + resolve_depth_shadow_view: CachedRenderPipelineId, + resolve_material_depth: CachedRenderPipelineId, + remap_1d_to_2d_dispatch: Option, } impl FromWorld for MeshletPipelines { fn from_world(world: &mut World) -> Self { - let gpu_scene = world.resource::(); - let fill_cluster_buffers_bind_group_layout = - gpu_scene.fill_cluster_buffers_bind_group_layout(); - let cull_layout = gpu_scene.culling_bind_group_layout(); - let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout(); - let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout(); - let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout(); + let resource_manager = world.resource::(); + let fill_cluster_buffers_bind_group_layout = resource_manager + .fill_cluster_buffers_bind_group_layout + .clone(); + let cull_layout = resource_manager.culling_bind_group_layout.clone(); + let downsample_depth_layout = resource_manager.downsample_depth_bind_group_layout.clone(); + let visibility_buffer_raster_layout = resource_manager + .visibility_buffer_raster_bind_group_layout + .clone(); + let resolve_depth_layout = resource_manager.resolve_depth_bind_group_layout.clone(); + let resolve_material_depth_layout = resource_manager + .resolve_material_depth_bind_group_layout + .clone(); + let remap_1d_to_2d_dispatch_layout = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .clone(); let pipeline_cache = world.resource_mut::(); Self { @@ -61,7 +82,10 @@ impl FromWorld for MeshletPipelines { cull_first: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { label: Some("meshlet_culling_first_pipeline".into()), layout: vec![cull_layout.clone()], - push_constant_ranges: vec![], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], shader: MESHLET_CULLING_SHADER_HANDLE, shader_defs: vec![ "MESHLET_CULLING_PASS".into(), @@ -73,7 +97,10 @@ impl FromWorld for MeshletPipelines { cull_second: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { label: Some("meshlet_culling_second_pipeline".into()), layout: vec![cull_layout], - push_constant_ranges: vec![], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], shader: MESHLET_CULLING_SHADER_HANDLE, shader_defs: vec![ "MESHLET_CULLING_PASS".into(), @@ -88,21 +115,49 @@ impl FromWorld for MeshletPipelines { layout: vec![downsample_depth_layout.clone()], push_constant_ranges: vec![PushConstantRange { stages: ShaderStages::COMPUTE, - range: 0..4, + range: 0..8, }], shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, - shader_defs: vec![], + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], entry_point: "downsample_depth_first".into(), }, ), downsample_depth_second: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_second_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "downsample_depth_second".into(), + }, + ), + + downsample_depth_first_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_first_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "downsample_depth_first".into(), + }, + ), + + downsample_depth_second_shadow_view: pipeline_cache.queue_compute_pipeline( ComputePipelineDescriptor { label: Some("meshlet_downsample_depth_second_pipeline".into()), layout: vec![downsample_depth_layout], push_constant_ranges: vec![PushConstantRange { stages: ShaderStages::COMPUTE, - range: 0..4, + range: 0..8, }], shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, shader_defs: vec![], @@ -110,13 +165,79 @@ impl FromWorld for MeshletPipelines { }, ), - visibility_buffer_raster: pipeline_cache.queue_render_pipeline( - RenderPipelineDescriptor { - label: Some("meshlet_visibility_buffer_raster_pipeline".into()), - layout: vec![visibility_buffer_layout.clone()], + visibility_buffer_software_raster: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }, + ), + + visibility_buffer_software_raster_depth_only: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_software_raster_depth_only_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }, + ), + + visibility_buffer_software_raster_depth_only_clamp_ortho: pipeline_cache + .queue_compute_pipeline(ComputePipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_software_raster_depth_only_clamp_ortho_pipeline" + .into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "DEPTH_CLAMP_ORTHO".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }), + + visibility_buffer_hardware_raster: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), @@ -133,44 +254,36 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, - depth_stencil: Some(DepthStencilState { - format: CORE_3D_DEPTH_FORMAT, - depth_write_enabled: true, - depth_compare: CompareFunction::GreaterEqual, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), + depth_stencil: None, multisample: MultisampleState::default(), fragment: Some(FragmentState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), ], entry_point: "fragment".into(), - targets: vec![ - Some(ColorTargetState { - format: TextureFormat::R32Uint, - blend: None, - write_mask: ColorWrites::ALL, - }), - Some(ColorTargetState { - format: TextureFormat::R16Uint, - blend: None, - write_mask: ColorWrites::ALL, - }), - ], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], }), }, ), - visibility_buffer_raster_depth_only: pipeline_cache.queue_render_pipeline( + visibility_buffer_hardware_raster_depth_only: pipeline_cache.queue_render_pipeline( RenderPipelineDescriptor { - label: Some("meshlet_visibility_buffer_raster_depth_only_pipeline".into()), - layout: vec![visibility_buffer_layout.clone()], - push_constant_ranges: vec![], + label: Some( + "meshlet_visibility_buffer_hardware_raster_depth_only_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], entry_point: "vertex".into(), buffers: vec![], @@ -184,27 +297,34 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, - depth_stencil: Some(DepthStencilState { - format: CORE_3D_DEPTH_FORMAT, - depth_write_enabled: true, - depth_compare: CompareFunction::GreaterEqual, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), + depth_stencil: None, multisample: MultisampleState::default(), - fragment: None, + fragment: Some(FragmentState { + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + entry_point: "fragment".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + }), }, ), - visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline( - RenderPipelineDescriptor { + visibility_buffer_hardware_raster_depth_only_clamp_ortho: pipeline_cache + .queue_render_pipeline(RenderPipelineDescriptor { label: Some( - "meshlet_visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into(), + "meshlet_visibility_buffer_hardware_raster_depth_only_clamp_ortho_pipeline" + .into(), ), - layout: vec![visibility_buffer_layout], - push_constant_ranges: vec![], + layout: vec![visibility_buffer_raster_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "DEPTH_CLAMP_ORTHO".into(), @@ -221,6 +341,58 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "DEPTH_CLAMP_ORTHO".into(), + ], + entry_point: "fragment".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + }), + }), + + resolve_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "resolve_depth".into(), + targets: vec![], + }), + }), + + resolve_depth_shadow_view: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), depth_stencil: Some(DepthStencilState { format: CORE_3D_DEPTH_FORMAT, depth_write_enabled: true, @@ -230,37 +402,50 @@ impl FromWorld for MeshletPipelines { }), multisample: MultisampleState::default(), fragment: Some(FragmentState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, - shader_defs: vec![ - "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), - "DEPTH_CLAMP_ORTHO".into(), - ], - entry_point: "fragment".into(), + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "resolve_depth".into(), targets: vec![], }), }, ), - copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { - label: Some("meshlet_copy_material_depth_pipeline".into()), - layout: vec![copy_material_depth_layout], - push_constant_ranges: vec![], - vertex: fullscreen_shader_vertex_state(), - primitive: PrimitiveState::default(), - depth_stencil: Some(DepthStencilState { - format: TextureFormat::Depth16Unorm, - depth_write_enabled: true, - depth_compare: CompareFunction::Always, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), - multisample: MultisampleState::default(), - fragment: Some(FragmentState { - shader: MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, + resolve_material_depth: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_resolve_material_depth_pipeline".into()), + layout: vec![resolve_material_depth_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "resolve_material_depth".into(), + targets: vec![], + }), + }, + ), + + remap_1d_to_2d_dispatch: remap_1d_to_2d_dispatch_layout.map(|layout| { + pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_remap_1d_to_2d_dispatch_pipeline".into()), + layout: vec![layout], + push_constant_ranges: vec![], + shader: MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE, shader_defs: vec![], - entry_point: "copy_material_depth".into(), - targets: vec![], - }), + entry_point: "remap_dispatch".into(), + }) }), } } @@ -270,6 +455,11 @@ impl MeshletPipelines { pub fn get( world: &World, ) -> Option<( + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, &ComputePipeline, &ComputePipeline, &ComputePipeline, @@ -279,6 +469,9 @@ impl MeshletPipelines { &RenderPipeline, &RenderPipeline, &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + Option<&ComputePipeline>, )> { let pipeline_cache = world.get_resource::()?; let pipeline = world.get_resource::()?; @@ -288,11 +481,27 @@ impl MeshletPipelines { pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?, pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?, - pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, - pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.visibility_buffer_software_raster)?, pipeline_cache - .get_render_pipeline(pipeline.visibility_buffer_raster_depth_only_clamp_ortho)?, - pipeline_cache.get_render_pipeline(pipeline.copy_material_depth)?, + .get_compute_pipeline(pipeline.visibility_buffer_software_raster_depth_only)?, + pipeline_cache.get_compute_pipeline( + pipeline.visibility_buffer_software_raster_depth_only_clamp_ortho, + )?, + pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_hardware_raster)?, + pipeline_cache + .get_render_pipeline(pipeline.visibility_buffer_hardware_raster_depth_only)?, + pipeline_cache.get_render_pipeline( + pipeline.visibility_buffer_hardware_raster_depth_only_clamp_ortho, + )?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth_shadow_view)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_material_depth)?, + match pipeline.remap_1d_to_2d_dispatch { + Some(id) => Some(pipeline_cache.get_compute_pipeline(id)?), + None => None, + }, )) } } diff --git a/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl b/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl new file mode 100644 index 0000000000..6ade11b1d8 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl @@ -0,0 +1,20 @@ +/// Remaps an indirect 1d to 2d dispatch for devices with low dispatch size limit. + +struct DispatchIndirectArgs { + x: u32, + y: u32, + z: u32, +} + +@group(0) @binding(0) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; +@group(0) @binding(1) var meshlet_software_raster_cluster_count: u32; + +@compute +@workgroup_size(1, 1, 1) +fn remap_dispatch() { + meshlet_software_raster_cluster_count = meshlet_software_raster_indirect_args.x; + + let n = u32(ceil(sqrt(f32(meshlet_software_raster_indirect_args.x)))); + meshlet_software_raster_indirect_args.x = n; + meshlet_software_raster_indirect_args.y = n; +} diff --git a/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl b/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl new file mode 100644 index 0000000000..b54dbaec53 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl @@ -0,0 +1,39 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var meshlet_visibility_buffer: array; // Per pixel +#else +@group(0) @binding(0) var meshlet_visibility_buffer: array; // Per pixel +#endif +@group(0) @binding(1) var meshlet_cluster_instance_ids: array; // Per cluster +@group(0) @binding(2) var meshlet_instance_material_ids: array; // Per entity instance +var view_width: u32; + +/// This pass writes out the depth texture. +@fragment +fn resolve_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x); + let visibility = meshlet_visibility_buffer[frag_coord_1d]; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + return bitcast(u32(visibility >> 32u)); +#else + return bitcast(visibility); +#endif +} + +/// This pass writes out the material depth texture. +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@fragment +fn resolve_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x); + let visibility = meshlet_visibility_buffer[frag_coord_1d]; + + let depth = visibility >> 32u; + if depth == 0lu { return 0.0; } + + let cluster_id = u32(visibility) >> 6u; + let instance_id = meshlet_cluster_instance_ids[cluster_id]; + let material_id = meshlet_instance_material_ids[instance_id]; + return f32(material_id) / 65535.0; +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/resource_manager.rs b/crates/bevy_pbr/src/meshlet/resource_manager.rs new file mode 100644 index 0000000000..1d6ada2a2a --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/resource_manager.rs @@ -0,0 +1,809 @@ +use super::{instance_manager::InstanceManager, meshlet_mesh_manager::MeshletMeshManager}; +use crate::ShadowView; +use bevy_core_pipeline::{ + core_3d::Camera3d, + prepass::{PreviousViewData, PreviousViewUniforms}, +}; +use bevy_ecs::{ + component::Component, + entity::{Entity, EntityHashMap}, + query::AnyOf, + system::{Commands, Query, Res, ResMut, Resource}, +}; +use bevy_math::{UVec2, Vec4Swizzles}; +use bevy_render::{ + render_resource::*, + renderer::{RenderDevice, RenderQueue}, + texture::{CachedTexture, TextureCache}, + view::{ExtractedView, RenderLayers, ViewUniform, ViewUniforms}, +}; +use binding_types::*; +use encase::internal::WriteInto; +use std::{ + array, iter, + mem::size_of, + sync::{atomic::AtomicBool, Arc}, +}; + +/// Manages per-view and per-cluster GPU resources for [`super::MeshletPlugin`]. +#[derive(Resource)] +pub struct ResourceManager { + /// Intermediate buffer of cluster IDs for use with rasterizing the visibility buffer + visibility_buffer_raster_clusters: Buffer, + /// Intermediate buffer of count of clusters to software rasterize + software_raster_cluster_count: Buffer, + /// Rightmost slot index of [`Self::visibility_buffer_raster_clusters`] + raster_cluster_rightmost_slot: u32, + + /// Per-cluster instance ID + cluster_instance_ids: Option, + /// Per-cluster meshlet ID + cluster_meshlet_ids: Option, + /// Per-cluster bitmask of whether or not it's a candidate for the second raster pass + second_pass_candidates_buffer: Option, + /// Sampler for a depth pyramid + depth_pyramid_sampler: Sampler, + /// Dummy texture view for binding depth pyramids with less than the maximum amount of mips + depth_pyramid_dummy_texture: TextureView, + + // TODO + previous_depth_pyramids: EntityHashMap, + + // Bind group layouts + pub fill_cluster_buffers_bind_group_layout: BindGroupLayout, + pub culling_bind_group_layout: BindGroupLayout, + pub visibility_buffer_raster_bind_group_layout: BindGroupLayout, + pub downsample_depth_bind_group_layout: BindGroupLayout, + pub resolve_depth_bind_group_layout: BindGroupLayout, + pub resolve_material_depth_bind_group_layout: BindGroupLayout, + pub material_shade_bind_group_layout: BindGroupLayout, + pub remap_1d_to_2d_dispatch_bind_group_layout: Option, +} + +impl ResourceManager { + pub fn new(cluster_buffer_slots: u32, render_device: &RenderDevice) -> Self { + let needs_dispatch_remap = + cluster_buffer_slots < render_device.limits().max_compute_workgroups_per_dimension; + + Self { + visibility_buffer_raster_clusters: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer_raster_clusters"), + size: cluster_buffer_slots as u64 * size_of::() as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + software_raster_cluster_count: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_software_raster_cluster_count"), + size: size_of::() as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + raster_cluster_rightmost_slot: cluster_buffer_slots - 1, + + cluster_instance_ids: None, + cluster_meshlet_ids: None, + second_pass_candidates_buffer: None, + depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("meshlet_depth_pyramid_sampler"), + ..SamplerDescriptor::default() + }), + depth_pyramid_dummy_texture: render_device + .create_texture(&TextureDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture"), + size: Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING, + view_formats: &[], + }) + .create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }), + + previous_depth_pyramids: EntityHashMap::default(), + + // TODO: Buffer min sizes + fill_cluster_buffers_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_fill_cluster_buffers_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + culling_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + ), + ), + ), + downsample_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_downsample_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { + let write_only_r32float = || { + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) + }; + ( + storage_buffer_read_only_sized(false, None), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + texture_storage_2d( + TextureFormat::R32Float, + StorageTextureAccess::ReadWrite, + ), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + sampler(SamplerBindingType::NonFiltering), + ) + }), + ), + visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_visibility_buffer_raster_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::all(), + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + uniform_buffer::(true), + ), + ), + ), + resolve_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_depth_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + storage_buffer_read_only_sized(false, None), + ), + ), + resolve_material_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_material_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + material_shade_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_mesh_material_shade_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + remap_1d_to_2d_dispatch_bind_group_layout: needs_dispatch_remap.then(|| { + render_device.create_bind_group_layout( + "meshlet_remap_1d_to_2d_dispatch_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ) + }), + } + } +} + +// ------------ TODO: Everything under here needs to be rewritten and cached ------------ + +#[derive(Component)] +pub struct MeshletViewResources { + pub scene_cluster_count: u32, + pub second_pass_candidates_buffer: Buffer, + instance_visibility: Buffer, + pub dummy_render_target: CachedTexture, + pub visibility_buffer: Buffer, + pub visibility_buffer_software_raster_indirect_args_first: Buffer, + pub visibility_buffer_software_raster_indirect_args_second: Buffer, + pub visibility_buffer_hardware_raster_indirect_args_first: Buffer, + pub visibility_buffer_hardware_raster_indirect_args_second: Buffer, + depth_pyramid_all_mips: TextureView, + depth_pyramid_mips: [TextureView; 12], + pub depth_pyramid_mip_count: u32, + previous_depth_pyramid: TextureView, + pub material_depth: Option, + pub view_size: UVec2, + pub raster_cluster_rightmost_slot: u32, +} + +#[derive(Component)] +pub struct MeshletViewBindGroups { + pub first_node: Arc, + pub fill_cluster_buffers: BindGroup, + pub culling_first: BindGroup, + pub culling_second: BindGroup, + pub downsample_depth: BindGroup, + pub visibility_buffer_raster: BindGroup, + pub resolve_depth: BindGroup, + pub resolve_material_depth: Option, + pub material_shade: Option, + pub remap_1d_to_2d_dispatch: Option<(BindGroup, BindGroup)>, +} + +// TODO: Try using Queue::write_buffer_with() in queue_meshlet_mesh_upload() to reduce copies +fn upload_storage_buffer( + buffer: &mut StorageBuffer>, + render_device: &RenderDevice, + render_queue: &RenderQueue, +) where + Vec: WriteInto, +{ + let inner = buffer.buffer(); + let capacity = inner.map_or(0, |b| b.size()); + let size = buffer.get().size().get() as BufferAddress; + + if capacity >= size { + let inner = inner.unwrap(); + let bytes = bytemuck::must_cast_slice(buffer.get().as_slice()); + render_queue.write_buffer(inner, 0, bytes); + } else { + buffer.write_buffer(render_device, render_queue); + } +} + +// TODO: Cache things per-view and skip running this system / optimize this system +pub fn prepare_meshlet_per_frame_resources( + mut resource_manager: ResMut, + mut instance_manager: ResMut, + views: Query<( + Entity, + &ExtractedView, + Option<&RenderLayers>, + AnyOf<(&Camera3d, &ShadowView)>, + )>, + mut texture_cache: ResMut, + render_queue: Res, + render_device: Res, + mut commands: Commands, +) { + if instance_manager.scene_cluster_count == 0 { + return; + } + + let instance_manager = instance_manager.as_mut(); + + // TODO: Move this and the submit to a separate system and remove pub from the fields + instance_manager + .instance_uniforms + .write_buffer(&render_device, &render_queue); + upload_storage_buffer( + &mut instance_manager.instance_material_ids, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut instance_manager.instance_meshlet_counts_prefix_sum, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut instance_manager.instance_meshlet_slice_starts, + &render_device, + &render_queue, + ); + + // Early submission for GPU data uploads to start while the render graph records commands + render_queue.submit([]); + + let needed_buffer_size = 4 * instance_manager.scene_cluster_count as u64; + match &mut resource_manager.cluster_instance_ids { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_cluster_instance_ids"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + match &mut resource_manager.cluster_meshlet_ids { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_cluster_meshlet_ids"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + let needed_buffer_size = + instance_manager.scene_cluster_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; + for (view_entity, view, render_layers, (_, shadow_view)) in &views { + let not_shadow_view = shadow_view.is_none(); + + let instance_visibility = instance_manager + .view_instance_visibility + .entry(view_entity) + .or_insert_with(|| { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_view_instance_visibility")); + buffer + }); + for (instance_index, (_, layers, not_shadow_caster)) in + instance_manager.instances.iter().enumerate() + { + // If either the layers don't match the view's layers or this is a shadow view + // and the instance is not a shadow caster, hide the instance for this view + if !render_layers + .unwrap_or(&RenderLayers::default()) + .intersects(layers) + || (shadow_view.is_some() && *not_shadow_caster) + { + let vec = instance_visibility.get_mut(); + let index = instance_index / 32; + let bit = instance_index - index * 32; + if vec.len() <= index { + vec.extend(iter::repeat(0).take(index - vec.len() + 1)); + } + vec[index] |= 1 << bit; + } + } + upload_storage_buffer(instance_visibility, &render_device, &render_queue); + let instance_visibility = instance_visibility.buffer().unwrap().clone(); + + let second_pass_candidates_buffer = + match &mut resource_manager.second_pass_candidates_buffer { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_second_pass_candidates"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + // TODO: Remove this once wgpu allows render passes with no attachments + let dummy_render_target = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_dummy_render_target"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R8Uint, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }, + ); + + let type_size = if not_shadow_view { + size_of::() + } else { + size_of::() + } as u64; + // TODO: Cache + let visibility_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer"), + size: type_size * (view.viewport.z * view.viewport.w) as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let visibility_buffer_software_raster_indirect_args_first = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_indirect_args_first"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let visibility_buffer_software_raster_indirect_args_second = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("visibility_buffer_software_raster_indirect_args_second"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let visibility_buffer_hardware_raster_indirect_args_first = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_indirect_args_first"), + contents: DrawIndirectArgs { + vertex_count: 64 * 3, + instance_count: 0, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let visibility_buffer_hardware_raster_indirect_args_second = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("visibility_buffer_hardware_raster_indirect_args_second"), + contents: DrawIndirectArgs { + vertex_count: 64 * 3, + instance_count: 0, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let depth_pyramid_size = Extent3d { + width: view.viewport.z.div_ceil(2), + height: view.viewport.w.div_ceil(2), + depth_or_array_layers: 1, + }; + let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); + let depth_pyramid = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_depth_pyramid"), + size: depth_pyramid_size, + mip_level_count: depth_pyramid_mip_count, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + let depth_pyramid_mips = array::from_fn(|i| { + if (i as u32) < depth_pyramid_mip_count { + depth_pyramid.texture.create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: i as u32, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }) + } else { + resource_manager.depth_pyramid_dummy_texture.clone() + } + }); + let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); + + let previous_depth_pyramid = + match resource_manager.previous_depth_pyramids.get(&view_entity) { + Some(texture_view) => texture_view.clone(), + None => depth_pyramid_all_mips.clone(), + }; + resource_manager + .previous_depth_pyramids + .insert(view_entity, depth_pyramid_all_mips.clone()); + + let material_depth = TextureDescriptor { + label: Some("meshlet_material_depth"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Depth16Unorm, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }; + + commands.entity(view_entity).insert(MeshletViewResources { + scene_cluster_count: instance_manager.scene_cluster_count, + second_pass_candidates_buffer, + instance_visibility, + dummy_render_target, + visibility_buffer, + visibility_buffer_software_raster_indirect_args_first, + visibility_buffer_software_raster_indirect_args_second, + visibility_buffer_hardware_raster_indirect_args_first, + visibility_buffer_hardware_raster_indirect_args_second, + depth_pyramid_all_mips, + depth_pyramid_mips, + depth_pyramid_mip_count, + previous_depth_pyramid, + material_depth: not_shadow_view + .then(|| texture_cache.get(&render_device, material_depth)), + view_size: view.viewport.zw(), + raster_cluster_rightmost_slot: resource_manager.raster_cluster_rightmost_slot, + }); + } +} + +#[allow(clippy::too_many_arguments)] +pub fn prepare_meshlet_view_bind_groups( + meshlet_mesh_manager: Res, + resource_manager: Res, + instance_manager: Res, + views: Query<(Entity, &MeshletViewResources)>, + view_uniforms: Res, + previous_view_uniforms: Res, + render_device: Res, + mut commands: Commands, +) { + let ( + Some(cluster_instance_ids), + Some(cluster_meshlet_ids), + Some(view_uniforms), + Some(previous_view_uniforms), + ) = ( + resource_manager.cluster_instance_ids.as_ref(), + resource_manager.cluster_meshlet_ids.as_ref(), + view_uniforms.uniforms.binding(), + previous_view_uniforms.uniforms.binding(), + ) + else { + return; + }; + + let first_node = Arc::new(AtomicBool::new(true)); + + // TODO: Some of these bind groups can be reused across multiple views + for (view_entity, view_resources) in &views { + let entries = BindGroupEntries::sequential(( + instance_manager + .instance_meshlet_counts_prefix_sum + .binding() + .unwrap(), + instance_manager + .instance_meshlet_slice_starts + .binding() + .unwrap(), + cluster_instance_ids.as_entire_binding(), + cluster_meshlet_ids.as_entire_binding(), + )); + let fill_cluster_buffers = render_device.create_bind_group( + "meshlet_fill_cluster_buffers", + &resource_manager.fill_cluster_buffers_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlet_bounding_spheres.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), + view_resources + .visibility_buffer_software_raster_indirect_args_first + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args_first + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + )); + let culling_first = render_device.create_bind_group( + "meshlet_culling_first_bind_group", + &resource_manager.culling_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlet_bounding_spheres.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), + view_resources + .visibility_buffer_software_raster_indirect_args_second + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args_second + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + &view_resources.depth_pyramid_all_mips, + view_uniforms.clone(), + previous_view_uniforms.clone(), + )); + let culling_second = render_device.create_bind_group( + "meshlet_culling_second_bind_group", + &resource_manager.culling_bind_group_layout, + &entries, + ); + + let downsample_depth = render_device.create_bind_group( + "meshlet_downsample_depth_bind_group", + &resource_manager.downsample_depth_bind_group_layout, + &BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + &view_resources.depth_pyramid_mips[0], + &view_resources.depth_pyramid_mips[1], + &view_resources.depth_pyramid_mips[2], + &view_resources.depth_pyramid_mips[3], + &view_resources.depth_pyramid_mips[4], + &view_resources.depth_pyramid_mips[5], + &view_resources.depth_pyramid_mips[6], + &view_resources.depth_pyramid_mips[7], + &view_resources.depth_pyramid_mips[8], + &view_resources.depth_pyramid_mips[9], + &view_resources.depth_pyramid_mips[10], + &view_resources.depth_pyramid_mips[11], + &resource_manager.depth_pyramid_sampler, + )), + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_ids.binding(), + meshlet_mesh_manager.vertex_data.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + view_resources.visibility_buffer.as_entire_binding(), + view_uniforms.clone(), + )); + let visibility_buffer_raster = render_device.create_bind_group( + "meshlet_visibility_raster_buffer_bind_group", + &resource_manager.visibility_buffer_raster_bind_group_layout, + &entries, + ); + + let resolve_depth = render_device.create_bind_group( + "meshlet_resolve_depth_bind_group", + &resource_manager.resolve_depth_bind_group_layout, + &BindGroupEntries::single(view_resources.visibility_buffer.as_entire_binding()), + ); + + let resolve_material_depth = view_resources.material_depth.as_ref().map(|_| { + let entries = BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_material_ids.binding().unwrap(), + )); + render_device.create_bind_group( + "meshlet_resolve_material_depth_bind_group", + &resource_manager.resolve_material_depth_bind_group_layout, + &entries, + ) + }); + + let material_shade = view_resources.material_depth.as_ref().map(|_| { + let entries = BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_ids.binding(), + meshlet_mesh_manager.vertex_data.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + )); + render_device.create_bind_group( + "meshlet_mesh_material_shade_bind_group", + &resource_manager.material_shade_bind_group_layout, + &entries, + ) + }); + + let remap_1d_to_2d_dispatch = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .as_ref() + .map(|layout| { + ( + render_device.create_bind_group( + "meshlet_remap_1d_to_2d_dispatch_first_bind_group", + layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args_first + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ), + render_device.create_bind_group( + "meshlet_remap_1d_to_2d_dispatch_second_bind_group", + layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args_second + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ), + ) + }); + + commands.entity(view_entity).insert(MeshletViewBindGroups { + first_node: Arc::clone(&first_node), + fill_cluster_buffers, + culling_first, + culling_second, + downsample_depth, + visibility_buffer_raster, + resolve_depth, + resolve_material_depth, + material_shade, + remap_1d_to_2d_dispatch, + }); + } +} diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl similarity index 56% rename from crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl rename to crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl index 86b34cd2f0..e3cf7a6fb3 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl @@ -6,8 +6,8 @@ meshlet_vertex_data, meshlet_cluster_instance_ids, meshlet_instance_uniforms, - meshlet_instance_material_ids, - draw_triangle_buffer, + meshlet_raster_clusters, + meshlet_visibility_buffer, view, get_meshlet_index, unpack_meshlet_vertex, @@ -15,38 +15,33 @@ mesh_functions::mesh_position_local_to_world, } #import bevy_render::maths::affine3_to_square +var meshlet_raster_cluster_rightmost_slot: u32; -/// Vertex/fragment shader for rasterizing meshlets into a visibility buffer. +/// Vertex/fragment shader for rasterizing large clusters into a visibility buffer. struct VertexOutput { - @builtin(position) clip_position: vec4, + @builtin(position) position: vec4, #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT - @location(0) @interpolate(flat) visibility: u32, - @location(1) @interpolate(flat) material_depth: u32, + @location(0) @interpolate(flat) packed_ids: u32, #endif #ifdef DEPTH_CLAMP_ORTHO @location(0) unclamped_clip_depth: f32, #endif } -#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT -struct FragmentOutput { - @location(0) visibility: vec4, - @location(1) material_depth: vec4, -} -#endif - @vertex -fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { - let packed_ids = draw_triangle_buffer[vertex_index / 3u]; - let cluster_id = packed_ids >> 6u; - let triangle_id = extractBits(packed_ids, 0u, 6u); - let index_id = (triangle_id * 3u) + (vertex_index % 3u); +fn vertex(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) vertex_index: u32) -> VertexOutput { + let cluster_id = meshlet_raster_clusters[meshlet_raster_cluster_rightmost_slot - instance_index]; let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; + + let triangle_id = vertex_index / 3u; + if triangle_id >= meshlet.triangle_count { return dummy_vertex(); } + let index_id = (triangle_id * 3u) + (vertex_index % 3u); let index = get_meshlet_index(meshlet.start_index_id + index_id); let vertex_id = meshlet_vertex_ids[meshlet.start_vertex_id + index]; let vertex = unpack_meshlet_vertex(meshlet_vertex_data[vertex_id]); + let instance_id = meshlet_cluster_instance_ids[cluster_id]; let instance_uniform = meshlet_instance_uniforms[instance_id]; @@ -61,8 +56,7 @@ fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { return VertexOutput( clip_position, #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT - packed_ids, - meshlet_instance_material_ids[instance_id], + (cluster_id << 6u) | triangle_id, #endif #ifdef DEPTH_CLAMP_ORTHO unclamped_clip_depth, @@ -70,19 +64,31 @@ fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { ); } -#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT @fragment -fn fragment(vertex_output: VertexOutput) -> FragmentOutput { - return FragmentOutput( - vec4(vertex_output.visibility, 0u, 0u, 0u), - vec4(vertex_output.material_depth, 0u, 0u, 0u), +fn fragment(vertex_output: VertexOutput) { + let frag_coord_1d = u32(vertex_output.position.y) * u32(view.viewport.z) + u32(vertex_output.position.x); + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let depth = bitcast(vertex_output.position.z); + let visibility = (u64(depth) << 32u) | u64(vertex_output.packed_ids); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility); +#else ifdef DEPTH_CLAMP_ORTHO + let depth = bitcast(vertex_output.unclamped_clip_depth); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#else + let depth = bitcast(vertex_output.position.z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#endif +} + +fn dummy_vertex() -> VertexOutput { + return VertexOutput( + vec4(0.0), +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + 0u, +#endif +#ifdef DEPTH_CLAMP_ORTHO + 0.0, +#endif ); } -#endif - -#ifdef DEPTH_CLAMP_ORTHO -@fragment -fn fragment(vertex_output: VertexOutput) -> @builtin(frag_depth) f32 { - return vertex_output.unclamped_clip_depth; -} -#endif diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs index 5af10769cc..3f2b8883e7 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -1,6 +1,6 @@ use super::{ - gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, pipelines::MeshletPipelines, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, }; use crate::{LightEntity, ShadowView, ViewLightEntities}; use bevy_color::LinearRgba; @@ -80,10 +80,18 @@ impl Node for MeshletVisibilityBufferRasterPassNode { culling_second_pipeline, downsample_depth_first_pipeline, downsample_depth_second_pipeline, - visibility_buffer_raster_pipeline, - visibility_buffer_raster_depth_only_pipeline, - visibility_buffer_raster_depth_only_clamp_ortho, - copy_material_depth_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_software_raster_depth_only_pipeline, + visibility_buffer_software_raster_depth_only_clamp_ortho, + visibility_buffer_hardware_raster_pipeline, + visibility_buffer_hardware_raster_depth_only_pipeline, + visibility_buffer_hardware_raster_depth_only_clamp_ortho, + resolve_depth_pipeline, + resolve_depth_shadow_view_pipeline, + resolve_material_depth_pipeline, + remap_1d_to_2d_dispatch_pipeline, )) = MeshletPipelines::get(world) else { return Ok(()); @@ -94,7 +102,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { .fetch_and(false, Ordering::SeqCst); let thread_per_cluster_workgroups = - (meshlet_view_resources.scene_meshlet_count.div_ceil(128) as f32) + (meshlet_view_resources.scene_cluster_count.div_ceil(128) as f32) .cbrt() .ceil() as u32; @@ -112,7 +120,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { &meshlet_view_bind_groups.fill_cluster_buffers, fill_cluster_buffers_pipeline, thread_per_cluster_workgroups, - meshlet_view_resources.scene_meshlet_count, + meshlet_view_resources.scene_cluster_count, ); } cull_pass( @@ -123,17 +131,25 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_first_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(bg1, _)| bg1), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( true, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, - view_depth.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_first, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_first, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - visibility_buffer_raster_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, Some(camera), + meshlet_view_resources.raster_cluster_rightmost_slot, ); downsample_depth( render_context, @@ -150,23 +166,39 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_second_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(_, bg2)| bg2), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( false, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, - view_depth.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_second, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_second, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - visibility_buffer_raster_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, Some(camera), + meshlet_view_resources.raster_cluster_rightmost_slot, ); - copy_material_depth_pass( + resolve_depth( + render_context, + view_depth.get_attachment(StoreOp::Store), + meshlet_view_resources, + meshlet_view_bind_groups, + resolve_depth_pipeline, + camera, + ); + resolve_material_depth( render_context, meshlet_view_resources, meshlet_view_bind_groups, - copy_material_depth_pipeline, + resolve_material_depth_pipeline, camera, ); downsample_depth( @@ -191,9 +223,18 @@ impl Node for MeshletVisibilityBufferRasterPassNode { continue; }; - let shadow_visibility_buffer_pipeline = match light_type { - LightEntity::Directional { .. } => visibility_buffer_raster_depth_only_clamp_ortho, - _ => visibility_buffer_raster_depth_only_pipeline, + let ( + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, + ) = match light_type { + LightEntity::Directional { .. } => ( + visibility_buffer_software_raster_depth_only_clamp_ortho, + visibility_buffer_hardware_raster_depth_only_clamp_ortho, + ), + _ => ( + visibility_buffer_software_raster_depth_only_pipeline, + visibility_buffer_hardware_raster_depth_only_pipeline, + ), }; render_context.command_encoder().push_debug_group(&format!( @@ -213,24 +254,32 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_first_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(bg1, _)| bg1), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( true, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, - shadow_view.depth_attachment.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_first, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_first, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - shadow_visibility_buffer_pipeline, + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, None, + meshlet_view_resources.raster_cluster_rightmost_slot, ); downsample_depth( render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_first_pipeline, - downsample_depth_second_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, ); cull_pass( "culling_second", @@ -240,24 +289,40 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_second_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(_, bg2)| bg2), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( false, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, - shadow_view.depth_attachment.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_second, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_second, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - shadow_visibility_buffer_pipeline, + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, None, + meshlet_view_resources.raster_cluster_rightmost_slot, + ); + resolve_depth( + render_context, + shadow_view.depth_attachment.get_attachment(StoreOp::Store), + meshlet_view_resources, + meshlet_view_bind_groups, + resolve_depth_shadow_view_pipeline, + camera, ); downsample_depth( render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_first_pipeline, - downsample_depth_second_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, ); render_context.command_encoder().pop_debug_group(); } @@ -274,20 +339,21 @@ fn fill_cluster_buffers_pass( cluster_count: u32, ) { let command_encoder = render_context.command_encoder(); - let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + let mut fill_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { label: Some("fill_cluster_buffers"), timestamp_writes: None, }); - cull_pass.set_pipeline(fill_cluster_buffers_pass_pipeline); - cull_pass.set_push_constants(0, &cluster_count.to_le_bytes()); - cull_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]); - cull_pass.dispatch_workgroups( + fill_pass.set_pipeline(fill_cluster_buffers_pass_pipeline); + fill_pass.set_push_constants(0, &cluster_count.to_le_bytes()); + fill_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]); + fill_pass.dispatch_workgroups( fill_cluster_buffers_pass_workgroups, fill_cluster_buffers_pass_workgroups, fill_cluster_buffers_pass_workgroups, ); } +#[allow(clippy::too_many_arguments)] fn cull_pass( label: &'static str, render_context: &mut RenderContext, @@ -296,6 +362,9 @@ fn cull_pass( previous_view_offset: &PreviousViewUniformOffset, culling_pipeline: &ComputePipeline, culling_workgroups: u32, + raster_cluster_rightmost_slot: u32, + remap_1d_to_2d_dispatch_bind_group: Option<&BindGroup>, + remap_1d_to_2d_dispatch_pipeline: Option<&ComputePipeline>, ) { let command_encoder = render_context.command_encoder(); let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { @@ -303,82 +372,90 @@ fn cull_pass( timestamp_writes: None, }); cull_pass.set_pipeline(culling_pipeline); + cull_pass.set_push_constants(0, &raster_cluster_rightmost_slot.to_le_bytes()); cull_pass.set_bind_group( 0, culling_bind_group, &[view_offset.offset, previous_view_offset.offset], ); cull_pass.dispatch_workgroups(culling_workgroups, culling_workgroups, culling_workgroups); + + if let (Some(remap_1d_to_2d_dispatch_pipeline), Some(remap_1d_to_2d_dispatch_bind_group)) = ( + remap_1d_to_2d_dispatch_pipeline, + remap_1d_to_2d_dispatch_bind_group, + ) { + cull_pass.set_pipeline(remap_1d_to_2d_dispatch_pipeline); + cull_pass.set_bind_group(0, remap_1d_to_2d_dispatch_bind_group, &[]); + cull_pass.dispatch_workgroups(1, 1, 1); + } } #[allow(clippy::too_many_arguments)] fn raster_pass( first_pass: bool, render_context: &mut RenderContext, - meshlet_view_resources: &MeshletViewResources, - visibility_buffer_draw_indirect_args: &Buffer, - depth_stencil_attachment: RenderPassDepthStencilAttachment, + visibility_buffer_hardware_software_indirect_args: &Buffer, + visibility_buffer_hardware_raster_indirect_args: &Buffer, + dummy_render_target: &TextureView, meshlet_view_bind_groups: &MeshletViewBindGroups, view_offset: &ViewUniformOffset, - visibility_buffer_raster_pipeline: &RenderPipeline, + visibility_buffer_hardware_software_pipeline: &ComputePipeline, + visibility_buffer_hardware_raster_pipeline: &RenderPipeline, camera: Option<&ExtractedCamera>, + raster_cluster_rightmost_slot: u32, ) { - let mut color_attachments_filled = [None, None]; - if let (Some(visibility_buffer), Some(material_depth_color)) = ( - meshlet_view_resources.visibility_buffer.as_ref(), - meshlet_view_resources.material_depth_color.as_ref(), - ) { - let load = if first_pass { - LoadOp::Clear(LinearRgba::BLACK.into()) - } else { - LoadOp::Load - }; - color_attachments_filled = [ - Some(RenderPassColorAttachment { - view: &visibility_buffer.default_view, - resolve_target: None, - ops: Operations { - load, - store: StoreOp::Store, - }, - }), - Some(RenderPassColorAttachment { - view: &material_depth_color.default_view, - resolve_target: None, - ops: Operations { - load, - store: StoreOp::Store, - }, - }), - ]; - } - - let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + let command_encoder = render_context.command_encoder(); + let mut software_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { label: Some(if first_pass { - "raster_first" + "raster_software_first" } else { - "raster_second" + "raster_software_second" }), - color_attachments: if color_attachments_filled[0].is_none() { - &[] - } else { - &color_attachments_filled - }, - depth_stencil_attachment: Some(depth_stencil_attachment), timestamp_writes: None, - occlusion_query_set: None, }); - if let Some(viewport) = camera.and_then(|camera| camera.viewport.as_ref()) { - draw_pass.set_camera_viewport(viewport); - } - - draw_pass.set_render_pipeline(visibility_buffer_raster_pipeline); - draw_pass.set_bind_group( + software_pass.set_pipeline(visibility_buffer_hardware_software_pipeline); + software_pass.set_bind_group( 0, &meshlet_view_bind_groups.visibility_buffer_raster, &[view_offset.offset], ); - draw_pass.draw_indirect(visibility_buffer_draw_indirect_args, 0); + software_pass + .dispatch_workgroups_indirect(visibility_buffer_hardware_software_indirect_args, 0); + drop(software_pass); + + let mut hardware_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some(if first_pass { + "raster_hardware_first" + } else { + "raster_hardware_second" + }), + color_attachments: &[Some(RenderPassColorAttachment { + view: dummy_render_target, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Discard, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.and_then(|camera| camera.viewport.as_ref()) { + hardware_pass.set_camera_viewport(viewport); + } + hardware_pass.set_render_pipeline(visibility_buffer_hardware_raster_pipeline); + hardware_pass.set_push_constants( + ShaderStages::VERTEX, + 0, + &raster_cluster_rightmost_slot.to_le_bytes(), + ); + hardware_pass.set_bind_group( + 0, + &meshlet_view_bind_groups.visibility_buffer_raster, + &[view_offset.offset], + ); + hardware_pass.draw_indirect(visibility_buffer_hardware_raster_indirect_args, 0); } fn downsample_depth( @@ -396,7 +473,10 @@ fn downsample_depth( downsample_pass.set_pipeline(downsample_depth_first_pipeline); downsample_pass.set_push_constants( 0, - &meshlet_view_resources.depth_pyramid_mip_count.to_le_bytes(), + bytemuck::cast_slice(&[ + meshlet_view_resources.depth_pyramid_mip_count, + meshlet_view_resources.view_size.x, + ]), ); downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]); downsample_pass.dispatch_workgroups( @@ -411,19 +491,47 @@ fn downsample_depth( } } -fn copy_material_depth_pass( +fn resolve_depth( + render_context: &mut RenderContext, + depth_stencil_attachment: RenderPassDepthStencilAttachment, + meshlet_view_resources: &MeshletViewResources, + meshlet_view_bind_groups: &MeshletViewBindGroups, + resolve_depth_pipeline: &RenderPipeline, + camera: &ExtractedCamera, +) { + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_depth"), + color_attachments: &[], + depth_stencil_attachment: Some(depth_stencil_attachment), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = &camera.viewport { + resolve_pass.set_camera_viewport(viewport); + } + resolve_pass.set_render_pipeline(resolve_depth_pipeline); + resolve_pass.set_push_constants( + ShaderStages::FRAGMENT, + 0, + &meshlet_view_resources.view_size.x.to_le_bytes(), + ); + resolve_pass.set_bind_group(0, &meshlet_view_bind_groups.resolve_depth, &[]); + resolve_pass.draw(0..3, 0..1); +} + +fn resolve_material_depth( render_context: &mut RenderContext, meshlet_view_resources: &MeshletViewResources, meshlet_view_bind_groups: &MeshletViewBindGroups, - copy_material_depth_pipeline: &RenderPipeline, + resolve_material_depth_pipeline: &RenderPipeline, camera: &ExtractedCamera, ) { - if let (Some(material_depth), Some(copy_material_depth_bind_group)) = ( + if let (Some(material_depth), Some(resolve_material_depth_bind_group)) = ( meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.copy_material_depth.as_ref(), + meshlet_view_bind_groups.resolve_material_depth.as_ref(), ) { - let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { - label: Some("copy_material_depth"), + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_material_depth"), color_attachments: &[], depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { view: &material_depth.default_view, @@ -437,11 +545,15 @@ fn copy_material_depth_pass( occlusion_query_set: None, }); if let Some(viewport) = &camera.viewport { - copy_pass.set_camera_viewport(viewport); + resolve_pass.set_camera_viewport(viewport); } - - copy_pass.set_render_pipeline(copy_material_depth_pipeline); - copy_pass.set_bind_group(0, copy_material_depth_bind_group, &[]); - copy_pass.draw(0..3, 0..1); + resolve_pass.set_render_pipeline(resolve_material_depth_pipeline); + resolve_pass.set_push_constants( + ShaderStages::FRAGMENT, + 0, + &meshlet_view_resources.view_size.x.to_le_bytes(), + ); + resolve_pass.set_bind_group(0, resolve_material_depth_bind_group, &[]); + resolve_pass.draw(0..3, 0..1); } } diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl index baf72afcc4..bb35c16497 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl @@ -95,13 +95,14 @@ struct VertexOutput { /// Load the visibility buffer texture and resolve it into a VertexOutput. fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { - let packed_ids = textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy), 0).r; + let frag_coord_1d = u32(frag_coord.y) * u32(view.viewport.z) + u32(frag_coord.x); + let packed_ids = u32(meshlet_visibility_buffer[frag_coord_1d]); // TODO: Might be faster to load the correct u32 directly let cluster_id = packed_ids >> 6u; let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; let triangle_id = extractBits(packed_ids, 0u, 6u); - let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u); + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z)); let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]); let vertex_1 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.x]); diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl new file mode 100644 index 0000000000..02feaaeaed --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl @@ -0,0 +1,196 @@ +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_cluster_meshlet_ids, + meshlets, + meshlet_vertex_ids, + meshlet_vertex_data, + meshlet_cluster_instance_ids, + meshlet_instance_uniforms, + meshlet_raster_clusters, + meshlet_software_raster_cluster_count, + meshlet_visibility_buffer, + view, + get_meshlet_index, + unpack_meshlet_vertex, + }, + mesh_functions::mesh_position_local_to_world, + view_transformations::ndc_to_uv, +} +#import bevy_render::maths::affine3_to_square + +/// Compute shader for rasterizing small clusters into a visibility buffer. + +// TODO: Subpixel precision and top-left rule + +var viewport_vertices: array; + +@compute +@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 vertex/triangle per thread, 1 cluster per workgroup +fn rasterize_cluster( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_index) local_invocation_index: u32, +#ifdef MESHLET_2D_DISPATCH + @builtin(num_workgroups) num_workgroups: vec3, +#endif +) { + var workgroup_id_1d = workgroup_id.x; + +#ifdef MESHLET_2D_DISPATCH + workgroup_id_1d += workgroup_id.y * num_workgroups.x; + if workgroup_id_1d >= meshlet_software_raster_cluster_count { return; } +#endif + + let cluster_id = meshlet_raster_clusters[workgroup_id_1d]; + let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; + let meshlet = meshlets[meshlet_id]; + + // Load and project 1 vertex per thread + let vertex_id = local_invocation_index; + if vertex_id < meshlet.vertex_count { + let meshlet_vertex_id = meshlet_vertex_ids[meshlet.start_vertex_id + vertex_id]; + let vertex = unpack_meshlet_vertex(meshlet_vertex_data[meshlet_vertex_id]); + + // Project vertex to viewport space + let instance_id = meshlet_cluster_instance_ids[cluster_id]; + let instance_uniform = meshlet_instance_uniforms[instance_id]; + let world_from_local = affine3_to_square(instance_uniform.world_from_local); + let world_position = mesh_position_local_to_world(world_from_local, vec4(vertex.position, 1.0)); + var clip_position = view.clip_from_world * vec4(world_position.xyz, 1.0); + var ndc_position = clip_position.xyz / clip_position.w; +#ifdef DEPTH_CLAMP_ORTHO + ndc_position.z = 1.0 / clip_position.z; +#endif + let viewport_position_xy = ndc_to_uv(ndc_position.xy) * view.viewport.zw; + + // Write vertex to workgroup shared memory + viewport_vertices[vertex_id] = vec3(viewport_position_xy, ndc_position.z); + } + + workgroupBarrier(); + + // Load 1 triangle's worth of vertex data per thread + let triangle_id = local_invocation_index; + if triangle_id >= meshlet.triangle_count { return; } + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); + let vertex_ids = vec3(get_meshlet_index(index_ids[0]), get_meshlet_index(index_ids[1]), get_meshlet_index(index_ids[2])); + let vertex_0 = viewport_vertices[vertex_ids[2]]; + let vertex_1 = viewport_vertices[vertex_ids[1]]; + let vertex_2 = viewport_vertices[vertex_ids[0]]; + let packed_ids = (cluster_id << 6u) | triangle_id; + + // Compute triangle bounding box + let min_x = u32(min3(vertex_0.x, vertex_1.x, vertex_2.x)); + let min_y = u32(min3(vertex_0.y, vertex_1.y, vertex_2.y)); + var max_x = u32(ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x))); + var max_y = u32(ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y))); + max_x = min(max_x, u32(view.viewport.z) - 1u); + max_y = min(max_y, u32(view.viewport.w) - 1u); + if any(vec2(min_x, min_y) > vec2(max_x, max_y)) { return; } + + // Setup triangle gradients + let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y); + let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x); + let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); // TODO: Reuse earlier calculations and take advantage of summing to 1 + let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area; + let z_x = dot(vertices_z, w_x); + let z_y = dot(vertices_z, w_y); + + // Setup initial triangle equations + let starting_pixel = vec2(f32(min_x), f32(min_y)) + 0.5; + var w_row = vec3( + // TODO: Reuse earlier calculations and take advantage of summing to 1 + edge_function(vertex_1.xy, vertex_2.xy, starting_pixel), + edge_function(vertex_2.xy, vertex_0.xy, starting_pixel), + edge_function(vertex_0.xy, vertex_1.xy, starting_pixel), + ); + var z_row = dot(vertices_z, w_row); + let view_width = u32(view.viewport.z); + var frag_coord_1d_row = min_y * view_width; + + // Rasterize triangle + if subgroupAny(max_x - min_x > 4u) { + // Scanline setup + let edge_012 = -w_x; + let open_edge = edge_012 < vec3(0.0); + let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0)); + let max_x_diff = vec3(max_x - min_x); + for (var y = min_y; y <= max_y; y++) { + // Calculate start and end X interval for pixels in this row within the triangle + let cross_x = w_row * inverse_edge_012; + let min_x2 = select(vec3(0.0), cross_x, open_edge); + let max_x2 = select(cross_x, max_x_diff, open_edge); + var x0 = u32(ceil(max3(min_x2[0], min_x2[1], min_x2[2]))); + var x1 = u32(min3(max_x2[0], max_x2[1], max_x2[2])); + + var w = w_row + w_x * f32(x0); + var z = z_row + z_x * f32(x0); + x0 += min_x; + x1 += min_x; + + // Iterate scanline X interval + for (var x = x0; x <= x1; x++) { + // Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it) + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids); + } + + // Increment edge functions along the X-axis + w += w_x; + z += z_x; + } + + // Increment edge functions along the Y-axis + w_row += w_y; + z_row += z_y; + frag_coord_1d_row += view_width; + } + } else { + // Iterate over every pixel in the triangle's bounding box + for (var y = min_y; y <= max_y; y++) { + var w = w_row; + var z = z_row; + + for (var x = min_x; x <= max_x; x++) { + // Check if point at pixel is within triangle + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids); + } + + // Increment edge functions along the X-axis + w += w_x; + z += z_x; + } + + // Increment edge functions along the Y-axis + w_row += w_y; + z_row += z_y; + frag_coord_1d_row += view_width; + } + } +} + +fn write_visibility_buffer_pixel(frag_coord_1d: u32, z: f32, packed_ids: u32) { +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let depth = bitcast(z); + let visibility = (u64(depth) << 32u) | u64(packed_ids); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility); +#else ifdef DEPTH_CLAMP_ORTHO + let depth = bitcast(1.0 / z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#else + let depth = bitcast(z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#endif +} + +fn edge_function(a: vec2, b: vec2, c: vec2) -> f32 { + return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x); +} + +fn min3(a: f32, b: f32, c: f32) -> f32 { + return min(a, min(b, c)); +} + +fn max3(a: f32, b: f32, c: f32) -> f32 { + return max(a, max(b, c)); +} diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index a8ca69a41f..e545e3df3b 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -31,7 +31,7 @@ use bevy_utils::tracing::error; #[cfg(feature = "meshlet")] use crate::meshlet::{ - prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene, + prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, InstanceManager, MeshletMesh, }; use crate::*; @@ -186,7 +186,7 @@ where .in_set(RenderSet::QueueMeshes) .after(prepare_assets::>) .before(queue_material_meshlet_meshes::) - .run_if(resource_exists::), + .run_if(resource_exists::), ); } } diff --git a/crates/bevy_render/src/render_resource/mod.rs b/crates/bevy_render/src/render_resource/mod.rs index 4b914ef413..42aed36339 100644 --- a/crates/bevy_render/src/render_resource/mod.rs +++ b/crates/bevy_render/src/render_resource/mod.rs @@ -32,7 +32,10 @@ pub use uniform_buffer::*; // TODO: decide where re-exports should go pub use wgpu::{ - util::{BufferInitDescriptor, DrawIndexedIndirectArgs, DrawIndirectArgs, TextureDataOrder}, + util::{ + BufferInitDescriptor, DispatchIndirectArgs, DrawIndexedIndirectArgs, DrawIndirectArgs, + TextureDataOrder, + }, AdapterInfo as WgpuAdapterInfo, AddressMode, AstcBlock, AstcChannel, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, BindingResource, BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress, BufferAsyncError, diff --git a/examples/3d/meshlet.rs b/examples/3d/meshlet.rs index 4721e0c981..20264b43a1 100644 --- a/examples/3d/meshlet.rs +++ b/examples/3d/meshlet.rs @@ -17,7 +17,7 @@ use camera_controller::{CameraController, CameraControllerPlugin}; use std::{f32::consts::PI, path::Path, process::ExitCode}; const ASSET_URL: &str = - "https://raw.githubusercontent.com/JMS55/bevy_meshlet_asset/b6c712cfc87c65de419f856845401aba336a7bcd/bunny.meshlet_mesh"; + "https://raw.githubusercontent.com/JMS55/bevy_meshlet_asset/10bb5471c7beedfe63ad1cf269599c92b0f10aa2/bunny.meshlet_mesh"; fn main() -> ExitCode { if !Path::new("./assets/models/bunny.meshlet_mesh").exists() { @@ -29,7 +29,9 @@ fn main() -> ExitCode { .insert_resource(DirectionalLightShadowMap { size: 4096 }) .add_plugins(( DefaultPlugins, - MeshletPlugin, + MeshletPlugin { + cluster_buffer_slots: 8192, + }, MaterialPlugin::::default(), CameraControllerPlugin, ))