mirror of
https://github.com/bevyengine/bevy
synced 2024-11-24 21:53:07 +00:00
Meshlet fill cluster buffers rewritten (#15955)
# Objective - Make the meshlet fill cluster buffers pass slightly faster - Address https://github.com/bevyengine/bevy/issues/15920 for meshlets - Added PreviousGlobalTransform as a required meshlet component to avoid extra archetype moves, slightly alleviating https://github.com/bevyengine/bevy/issues/14681 for meshlets - Enforce that MeshletPlugin::cluster_buffer_slots is not greater than 2^25 (glitches will occur otherwise). Technically this field controls post-lod/culling cluster count, and the issue is on pre-lod/culling cluster count, but it's still valid now, and in the future this will be more true. Needs to be merged after https://github.com/bevyengine/bevy/pull/15846 and https://github.com/bevyengine/bevy/pull/15886 ## Solution - Old pass dispatched a thread per cluster, and did a binary search over the instances to find which instance the cluster belongs to, and what meshlet index within the instance it is. - New pass dispatches a workgroup per instance, and has the workgroup loop over all meshlets in the instance in order to write out the cluster data. - Use a push constant instead of arrayLength to fix the linked bug - Remap 1d->2d dispatch for software raster only if actually needed to save on spawning excess workgroups ## Testing - Did you test these changes? If so, how? - Ran the meshlet example, and an example with 1041 instances of 32217 meshlets per instance. Profiled the second scene with nsight, went from 0.55ms -> 0.40ms. Small savings. We're pretty much VRAM bandwidth bound at this point. - How can other people (reviewers) test your changes? Is there anything specific they need to know? - Run the meshlet example ## Changelog (non-meshlets) - PreviousGlobalTransform now implements the Default trait
This commit is contained in:
parent
6d42830c7f
commit
3fb6cefb2f
11 changed files with 139 additions and 82 deletions
|
@ -13,7 +13,7 @@
|
|||
meshlet_software_raster_indirect_args,
|
||||
meshlet_hardware_raster_indirect_args,
|
||||
meshlet_raster_clusters,
|
||||
meshlet_raster_cluster_rightmost_slot,
|
||||
constants,
|
||||
MeshletBoundingSphere,
|
||||
}
|
||||
#import bevy_render::maths::affine3_to_square
|
||||
|
@ -32,7 +32,7 @@ fn cull_clusters(
|
|||
) {
|
||||
// Calculate the cluster ID for this thread
|
||||
let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
|
||||
if cluster_id >= arrayLength(&meshlet_cluster_meshlet_ids) { return; }
|
||||
if cluster_id >= constants.scene_cluster_count { return; }
|
||||
|
||||
#ifdef MESHLET_SECOND_CULLING_PASS
|
||||
if !cluster_is_second_pass_candidate(cluster_id) { return; }
|
||||
|
@ -138,7 +138,7 @@ fn cull_clusters(
|
|||
} else {
|
||||
// Append this cluster to the list for hardware rasterization
|
||||
buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u);
|
||||
buffer_slot = meshlet_raster_cluster_rightmost_slot - buffer_slot;
|
||||
buffer_slot = constants.meshlet_raster_cluster_rightmost_slot - buffer_slot;
|
||||
}
|
||||
meshlet_raster_clusters[buffer_slot] = cluster_id;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#import bevy_pbr::meshlet_bindings::{
|
||||
cluster_count,
|
||||
meshlet_instance_meshlet_counts_prefix_sum,
|
||||
scene_instance_count,
|
||||
meshlet_global_cluster_count,
|
||||
meshlet_instance_meshlet_counts,
|
||||
meshlet_instance_meshlet_slice_starts,
|
||||
meshlet_cluster_instance_ids,
|
||||
meshlet_cluster_meshlet_ids,
|
||||
|
@ -8,37 +9,42 @@
|
|||
|
||||
/// Writes out instance_id and meshlet_id to the global buffers for each cluster in the scene.
|
||||
|
||||
var<workgroup> cluster_slice_start_workgroup: u32;
|
||||
|
||||
@compute
|
||||
@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 cluster per thread
|
||||
@workgroup_size(1024, 1, 1) // 1024 threads per workgroup, 1 instance per workgroup
|
||||
fn fill_cluster_buffers(
|
||||
@builtin(workgroup_id) workgroup_id: vec3<u32>,
|
||||
@builtin(num_workgroups) num_workgroups: vec3<u32>,
|
||||
@builtin(local_invocation_index) local_invocation_index: u32,
|
||||
) {
|
||||
// Calculate the cluster ID for this thread
|
||||
let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
|
||||
if cluster_id >= cluster_count { return; } // TODO: Could be an arrayLength?
|
||||
// Calculate the instance ID for this workgroup
|
||||
var instance_id = workgroup_id.x + (workgroup_id.y * num_workgroups.x);
|
||||
if instance_id >= scene_instance_count { return; }
|
||||
|
||||
// Binary search to find the instance this cluster belongs to
|
||||
var left = 0u;
|
||||
var right = arrayLength(&meshlet_instance_meshlet_counts_prefix_sum) - 1u;
|
||||
while left <= right {
|
||||
let mid = (left + right) / 2u;
|
||||
if meshlet_instance_meshlet_counts_prefix_sum[mid] <= cluster_id {
|
||||
left = mid + 1u;
|
||||
} else {
|
||||
right = mid - 1u;
|
||||
}
|
||||
let instance_meshlet_count = meshlet_instance_meshlet_counts[instance_id];
|
||||
let instance_meshlet_slice_start = meshlet_instance_meshlet_slice_starts[instance_id];
|
||||
|
||||
// Reserve cluster slots for the instance and broadcast to the workgroup
|
||||
if local_invocation_index == 0u {
|
||||
cluster_slice_start_workgroup = atomicAdd(&meshlet_global_cluster_count, instance_meshlet_count);
|
||||
}
|
||||
let instance_id = right;
|
||||
let cluster_slice_start = workgroupUniformLoad(&cluster_slice_start_workgroup);
|
||||
|
||||
// Find the meshlet ID for this cluster within the instance's MeshletMesh
|
||||
let meshlet_id_local = cluster_id - meshlet_instance_meshlet_counts_prefix_sum[instance_id];
|
||||
// Loop enough times to write out all the meshlets for the instance given that each thread writes 1 meshlet in each iteration
|
||||
for (var clusters_written = 0u; clusters_written < instance_meshlet_count; clusters_written += 1024u) {
|
||||
// Calculate meshlet ID within this instance's MeshletMesh to process for this thread
|
||||
let meshlet_id_local = clusters_written + local_invocation_index;
|
||||
if meshlet_id_local >= instance_meshlet_count { return; }
|
||||
|
||||
// Find the overall meshlet ID in the global meshlet buffer
|
||||
let meshlet_id = meshlet_id_local + meshlet_instance_meshlet_slice_starts[instance_id];
|
||||
// Find the overall cluster ID in the global cluster buffer
|
||||
let cluster_id = cluster_slice_start + meshlet_id_local;
|
||||
|
||||
// Write results to buffers
|
||||
meshlet_cluster_instance_ids[cluster_id] = instance_id;
|
||||
meshlet_cluster_meshlet_ids[cluster_id] = meshlet_id;
|
||||
// Find the overall meshlet ID in the global meshlet buffer
|
||||
let meshlet_id = instance_meshlet_slice_start + meshlet_id_local;
|
||||
|
||||
// Write results to buffers
|
||||
meshlet_cluster_instance_ids[cluster_id] = instance_id;
|
||||
meshlet_cluster_meshlet_ids[cluster_id] = meshlet_id;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,8 +10,9 @@ use bevy_ecs::{
|
|||
query::Has,
|
||||
system::{Local, Query, Res, ResMut, Resource, SystemState},
|
||||
};
|
||||
use bevy_render::sync_world::MainEntity;
|
||||
use bevy_render::{render_resource::StorageBuffer, view::RenderLayers, MainWorld};
|
||||
use bevy_render::{
|
||||
render_resource::StorageBuffer, sync_world::MainEntity, view::RenderLayers, MainWorld,
|
||||
};
|
||||
use bevy_transform::components::GlobalTransform;
|
||||
use bevy_utils::{HashMap, HashSet};
|
||||
use core::ops::{DerefMut, Range};
|
||||
|
@ -19,33 +20,36 @@ use core::ops::{DerefMut, Range};
|
|||
/// Manages data for each entity with a [`MeshletMesh`].
|
||||
#[derive(Resource)]
|
||||
pub struct InstanceManager {
|
||||
/// Amount of clusters in the scene (sum of all meshlet counts across all instances)
|
||||
/// Amount of instances in the scene.
|
||||
pub scene_instance_count: u32,
|
||||
/// Amount of clusters in the scene.
|
||||
pub scene_cluster_count: u32,
|
||||
|
||||
/// Per-instance [`MainEntity`], [`RenderLayers`], and [`NotShadowCaster`]
|
||||
/// Per-instance [`MainEntity`], [`RenderLayers`], and [`NotShadowCaster`].
|
||||
pub instances: Vec<(MainEntity, RenderLayers, bool)>,
|
||||
/// Per-instance [`MeshUniform`]
|
||||
/// Per-instance [`MeshUniform`].
|
||||
pub instance_uniforms: StorageBuffer<Vec<MeshUniform>>,
|
||||
/// Per-instance material ID
|
||||
/// Per-instance material ID.
|
||||
pub instance_material_ids: StorageBuffer<Vec<u32>>,
|
||||
/// Prefix-sum of meshlet counts per instance
|
||||
pub instance_meshlet_counts_prefix_sum: StorageBuffer<Vec<u32>>,
|
||||
/// Per-instance index to the start of the instance's slice of the meshlets buffer
|
||||
/// Per-instance count of meshlets in the instance's [`MeshletMesh`].
|
||||
pub instance_meshlet_counts: StorageBuffer<Vec<u32>>,
|
||||
/// Per-instance index to the start of the instance's slice of the meshlets buffer.
|
||||
pub instance_meshlet_slice_starts: StorageBuffer<Vec<u32>>,
|
||||
/// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support.
|
||||
pub view_instance_visibility: EntityHashMap<StorageBuffer<Vec<u32>>>,
|
||||
|
||||
/// Next material ID available for a [`Material`]
|
||||
/// Next material ID available for a [`Material`].
|
||||
next_material_id: u32,
|
||||
/// Map of [`Material`] to material ID
|
||||
/// Map of [`Material`] to material ID.
|
||||
material_id_lookup: HashMap<UntypedAssetId, u32>,
|
||||
/// Set of material IDs used in the scene
|
||||
/// Set of material IDs used in the scene.
|
||||
material_ids_present_in_scene: HashSet<u32>,
|
||||
}
|
||||
|
||||
impl InstanceManager {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
scene_instance_count: 0,
|
||||
scene_cluster_count: 0,
|
||||
|
||||
instances: Vec::new(),
|
||||
|
@ -59,9 +63,9 @@ impl InstanceManager {
|
|||
buffer.set_label(Some("meshlet_instance_material_ids"));
|
||||
buffer
|
||||
},
|
||||
instance_meshlet_counts_prefix_sum: {
|
||||
instance_meshlet_counts: {
|
||||
let mut buffer = StorageBuffer::default();
|
||||
buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum"));
|
||||
buffer.set_label(Some("meshlet_instance_meshlet_counts"));
|
||||
buffer
|
||||
},
|
||||
instance_meshlet_slice_starts: {
|
||||
|
@ -80,7 +84,7 @@ impl InstanceManager {
|
|||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn add_instance(
|
||||
&mut self,
|
||||
instance: Entity,
|
||||
instance: MainEntity,
|
||||
meshlets_slice: Range<u32>,
|
||||
transform: &GlobalTransform,
|
||||
previous_transform: Option<&PreviousGlobalTransform>,
|
||||
|
@ -108,20 +112,21 @@ impl InstanceManager {
|
|||
|
||||
// Append instance data
|
||||
self.instances.push((
|
||||
instance.into(),
|
||||
instance,
|
||||
render_layers.cloned().unwrap_or(RenderLayers::default()),
|
||||
not_shadow_caster,
|
||||
));
|
||||
self.instance_uniforms.get_mut().push(mesh_uniform);
|
||||
self.instance_material_ids.get_mut().push(0);
|
||||
self.instance_meshlet_counts_prefix_sum
|
||||
self.instance_meshlet_counts
|
||||
.get_mut()
|
||||
.push(self.scene_cluster_count);
|
||||
.push(meshlets_slice.len() as u32);
|
||||
self.instance_meshlet_slice_starts
|
||||
.get_mut()
|
||||
.push(meshlets_slice.start);
|
||||
|
||||
self.scene_cluster_count += meshlets_slice.end - meshlets_slice.start;
|
||||
self.scene_instance_count += 1;
|
||||
self.scene_cluster_count += meshlets_slice.len() as u32;
|
||||
}
|
||||
|
||||
/// Get the material ID for a [`crate::Material`].
|
||||
|
@ -140,12 +145,13 @@ impl InstanceManager {
|
|||
}
|
||||
|
||||
pub fn reset(&mut self, entities: &Entities) {
|
||||
self.scene_instance_count = 0;
|
||||
self.scene_cluster_count = 0;
|
||||
|
||||
self.instances.clear();
|
||||
self.instance_uniforms.get_mut().clear();
|
||||
self.instance_material_ids.get_mut().clear();
|
||||
self.instance_meshlet_counts_prefix_sum.get_mut().clear();
|
||||
self.instance_meshlet_counts.get_mut().clear();
|
||||
self.instance_meshlet_slice_starts.get_mut().clear();
|
||||
self.view_instance_visibility
|
||||
.retain(|view_entity, _| entities.contains(*view_entity));
|
||||
|
@ -227,7 +233,7 @@ pub fn extract_meshlet_mesh_entities(
|
|||
|
||||
// Add the instance's data to the instance manager
|
||||
instance_manager.add_instance(
|
||||
instance,
|
||||
instance.into(),
|
||||
meshlets_slice,
|
||||
transform,
|
||||
previous_transform,
|
||||
|
|
|
@ -51,15 +51,17 @@ struct DrawIndirectArgs {
|
|||
const CENTIMETERS_PER_METER = 100.0;
|
||||
|
||||
#ifdef MESHLET_FILL_CLUSTER_BUFFERS_PASS
|
||||
var<push_constant> cluster_count: u32;
|
||||
@group(0) @binding(0) var<storage, read> meshlet_instance_meshlet_counts_prefix_sum: array<u32>; // Per entity instance
|
||||
var<push_constant> scene_instance_count: u32;
|
||||
@group(0) @binding(0) var<storage, read> meshlet_instance_meshlet_counts: array<u32>; // Per entity instance
|
||||
@group(0) @binding(1) var<storage, read> meshlet_instance_meshlet_slice_starts: array<u32>; // Per entity instance
|
||||
@group(0) @binding(2) var<storage, read_write> meshlet_cluster_instance_ids: array<u32>; // Per cluster
|
||||
@group(0) @binding(3) var<storage, read_write> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
|
||||
@group(0) @binding(4) var<storage, read_write> meshlet_global_cluster_count: atomic<u32>; // Single object shared between all workgroups
|
||||
#endif
|
||||
|
||||
#ifdef MESHLET_CULLING_PASS
|
||||
var<push_constant> meshlet_raster_cluster_rightmost_slot: u32;
|
||||
struct Constants { scene_cluster_count: u32, meshlet_raster_cluster_rightmost_slot: u32 }
|
||||
var<push_constant> constants: Constants;
|
||||
@group(0) @binding(0) var<storage, read> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
|
||||
@group(0) @binding(1) var<storage, read> meshlet_bounding_spheres: array<MeshletBoundingSpheres>; // Per meshlet
|
||||
@group(0) @binding(2) var<storage, read> meshlet_simplification_errors: array<u32>; // Per meshlet
|
||||
|
@ -67,9 +69,9 @@ var<push_constant> meshlet_raster_cluster_rightmost_slot: u32;
|
|||
@group(0) @binding(4) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
|
||||
@group(0) @binding(5) var<storage, read> meshlet_view_instance_visibility: array<u32>; // 1 bit per entity instance, packed as a bitmask
|
||||
@group(0) @binding(6) var<storage, read_write> meshlet_second_pass_candidates: array<atomic<u32>>; // 1 bit per cluster , packed as a bitmask
|
||||
@group(0) @binding(7) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups/clusters/triangles
|
||||
@group(0) @binding(8) var<storage, read_write> meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/clusters/triangles
|
||||
@group(0) @binding(9) var<storage, read_write> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups/clusters/triangles
|
||||
@group(0) @binding(7) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups
|
||||
@group(0) @binding(8) var<storage, read_write> meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups
|
||||
@group(0) @binding(9) var<storage, read_write> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
|
||||
@group(0) @binding(10) var depth_pyramid: texture_2d<f32>; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass
|
||||
@group(0) @binding(11) var<uniform> view: View;
|
||||
@group(0) @binding(12) var<uniform> previous_view: PreviousViewUniforms;
|
||||
|
@ -95,7 +97,7 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool {
|
|||
@group(0) @binding(3) var<storage, read> meshlet_vertex_positions: array<u32>; // Many per meshlet
|
||||
@group(0) @binding(4) var<storage, read> meshlet_cluster_instance_ids: array<u32>; // Per cluster
|
||||
@group(0) @binding(5) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
|
||||
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups/clusters/triangles
|
||||
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
|
||||
@group(0) @binding(7) var<storage, read> meshlet_software_raster_cluster_count: u32;
|
||||
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
|
||||
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u64>>; // Per pixel
|
||||
|
|
|
@ -56,7 +56,7 @@ use self::{
|
|||
},
|
||||
visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode,
|
||||
};
|
||||
use crate::{graph::NodePbr, Material, MeshMaterial3d};
|
||||
use crate::{graph::NodePbr, Material, MeshMaterial3d, PreviousGlobalTransform};
|
||||
use bevy_app::{App, Plugin, PostUpdate};
|
||||
use bevy_asset::{load_internal_asset, AssetApp, AssetId, Handle};
|
||||
use bevy_core_pipeline::{
|
||||
|
@ -129,6 +129,8 @@ pub struct MeshletPlugin {
|
|||
/// If this number is too low, you'll see rendering artifacts like missing or blinking meshes.
|
||||
///
|
||||
/// Each cluster slot costs 4 bytes of VRAM.
|
||||
///
|
||||
/// Must not be greater than 2^25.
|
||||
pub cluster_buffer_slots: u32,
|
||||
}
|
||||
|
||||
|
@ -147,6 +149,11 @@ impl Plugin for MeshletPlugin {
|
|||
#[cfg(target_endian = "big")]
|
||||
compile_error!("MeshletPlugin is only supported on little-endian processors.");
|
||||
|
||||
if self.cluster_buffer_slots > 2_u32.pow(25) {
|
||||
error!("MeshletPlugin::cluster_buffer_slots must not be greater than 2^25.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
load_internal_asset!(
|
||||
app,
|
||||
MESHLET_BINDINGS_SHADER_HANDLE,
|
||||
|
@ -293,7 +300,7 @@ impl Plugin for MeshletPlugin {
|
|||
/// The meshlet mesh equivalent of [`bevy_render::mesh::Mesh3d`].
|
||||
#[derive(Component, Clone, Debug, Default, Deref, DerefMut, Reflect, PartialEq, Eq, From)]
|
||||
#[reflect(Component, Default)]
|
||||
#[require(Transform, Visibility)]
|
||||
#[require(Transform, PreviousGlobalTransform, Visibility)]
|
||||
pub struct MeshletMesh3d(pub Handle<MeshletMesh>);
|
||||
|
||||
impl From<MeshletMesh3d> for AssetId<MeshletMesh> {
|
||||
|
|
|
@ -84,7 +84,7 @@ impl FromWorld for MeshletPipelines {
|
|||
layout: vec![cull_layout.clone()],
|
||||
push_constant_ranges: vec![PushConstantRange {
|
||||
stages: ShaderStages::COMPUTE,
|
||||
range: 0..4,
|
||||
range: 0..8,
|
||||
}],
|
||||
shader: MESHLET_CULLING_SHADER_HANDLE,
|
||||
shader_defs: vec![
|
||||
|
@ -99,7 +99,7 @@ impl FromWorld for MeshletPipelines {
|
|||
layout: vec![cull_layout],
|
||||
push_constant_ranges: vec![PushConstantRange {
|
||||
stages: ShaderStages::COMPUTE,
|
||||
range: 0..4,
|
||||
range: 0..8,
|
||||
}],
|
||||
shader: MESHLET_CULLING_SHADER_HANDLE,
|
||||
shader_defs: vec![
|
||||
|
@ -441,7 +441,10 @@ impl FromWorld for MeshletPipelines {
|
|||
pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
|
||||
label: Some("meshlet_remap_1d_to_2d_dispatch_pipeline".into()),
|
||||
layout: vec![layout],
|
||||
push_constant_ranges: vec![],
|
||||
push_constant_ranges: vec![PushConstantRange {
|
||||
stages: ShaderStages::COMPUTE,
|
||||
range: 0..4,
|
||||
}],
|
||||
shader: MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE,
|
||||
shader_defs: vec![],
|
||||
entry_point: "remap_dispatch".into(),
|
||||
|
|
|
@ -8,13 +8,16 @@ struct DispatchIndirectArgs {
|
|||
|
||||
@group(0) @binding(0) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs;
|
||||
@group(0) @binding(1) var<storage, read_write> meshlet_software_raster_cluster_count: u32;
|
||||
var<push_constant> max_compute_workgroups_per_dimension: u32;
|
||||
|
||||
@compute
|
||||
@workgroup_size(1, 1, 1)
|
||||
fn remap_dispatch() {
|
||||
meshlet_software_raster_cluster_count = meshlet_software_raster_indirect_args.x;
|
||||
|
||||
let n = u32(ceil(sqrt(f32(meshlet_software_raster_indirect_args.x))));
|
||||
meshlet_software_raster_indirect_args.x = n;
|
||||
meshlet_software_raster_indirect_args.y = n;
|
||||
if meshlet_software_raster_cluster_count > max_compute_workgroups_per_dimension {
|
||||
let n = u32(ceil(sqrt(f32(meshlet_software_raster_cluster_count))));
|
||||
meshlet_software_raster_indirect_args.x = n;
|
||||
meshlet_software_raster_indirect_args.y = n;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,6 +122,7 @@ impl ResourceManager {
|
|||
storage_buffer_read_only_sized(false, None),
|
||||
storage_buffer_sized(false, None),
|
||||
storage_buffer_sized(false, None),
|
||||
storage_buffer_sized(false, None),
|
||||
),
|
||||
),
|
||||
),
|
||||
|
@ -246,6 +247,7 @@ impl ResourceManager {
|
|||
|
||||
#[derive(Component)]
|
||||
pub struct MeshletViewResources {
|
||||
pub scene_instance_count: u32,
|
||||
pub scene_cluster_count: u32,
|
||||
pub second_pass_candidates_buffer: Buffer,
|
||||
instance_visibility: Buffer,
|
||||
|
@ -330,7 +332,7 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
&render_queue,
|
||||
);
|
||||
upload_storage_buffer(
|
||||
&mut instance_manager.instance_meshlet_counts_prefix_sum,
|
||||
&mut instance_manager.instance_meshlet_counts,
|
||||
&render_device,
|
||||
&render_queue,
|
||||
);
|
||||
|
@ -340,9 +342,6 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
&render_queue,
|
||||
);
|
||||
|
||||
// Early submission for GPU data uploads to start while the render graph records commands
|
||||
render_queue.submit([]);
|
||||
|
||||
let needed_buffer_size = 4 * instance_manager.scene_cluster_count as u64;
|
||||
match &mut resource_manager.cluster_instance_ids {
|
||||
Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(),
|
||||
|
@ -553,6 +552,7 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
};
|
||||
|
||||
commands.entity(view_entity).insert(MeshletViewResources {
|
||||
scene_instance_count: instance_manager.scene_instance_count,
|
||||
scene_cluster_count: instance_manager.scene_cluster_count,
|
||||
second_pass_candidates_buffer,
|
||||
instance_visibility,
|
||||
|
@ -602,19 +602,25 @@ pub fn prepare_meshlet_view_bind_groups(
|
|||
|
||||
let first_node = Arc::new(AtomicBool::new(true));
|
||||
|
||||
let fill_cluster_buffers_global_cluster_count =
|
||||
render_device.create_buffer(&BufferDescriptor {
|
||||
label: Some("meshlet_fill_cluster_buffers_global_cluster_count"),
|
||||
size: 4,
|
||||
usage: BufferUsages::STORAGE,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
// TODO: Some of these bind groups can be reused across multiple views
|
||||
for (view_entity, view_resources) in &views {
|
||||
let entries = BindGroupEntries::sequential((
|
||||
instance_manager
|
||||
.instance_meshlet_counts_prefix_sum
|
||||
.binding()
|
||||
.unwrap(),
|
||||
instance_manager.instance_meshlet_counts.binding().unwrap(),
|
||||
instance_manager
|
||||
.instance_meshlet_slice_starts
|
||||
.binding()
|
||||
.unwrap(),
|
||||
cluster_instance_ids.as_entire_binding(),
|
||||
cluster_meshlet_ids.as_entire_binding(),
|
||||
fill_cluster_buffers_global_cluster_count.as_entire_binding(),
|
||||
));
|
||||
let fill_cluster_buffers = render_device.create_bind_group(
|
||||
"meshlet_fill_cluster_buffers",
|
||||
|
|
|
@ -118,8 +118,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
render_context,
|
||||
&meshlet_view_bind_groups.fill_cluster_buffers,
|
||||
fill_cluster_buffers_pipeline,
|
||||
thread_per_cluster_workgroups,
|
||||
meshlet_view_resources.scene_cluster_count,
|
||||
meshlet_view_resources.scene_instance_count,
|
||||
);
|
||||
}
|
||||
cull_pass(
|
||||
|
@ -130,6 +129,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
previous_view_offset,
|
||||
culling_first_pipeline,
|
||||
thread_per_cluster_workgroups,
|
||||
meshlet_view_resources.scene_cluster_count,
|
||||
meshlet_view_resources.raster_cluster_rightmost_slot,
|
||||
meshlet_view_bind_groups
|
||||
.remap_1d_to_2d_dispatch
|
||||
|
@ -165,6 +165,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
previous_view_offset,
|
||||
culling_second_pipeline,
|
||||
thread_per_cluster_workgroups,
|
||||
meshlet_view_resources.scene_cluster_count,
|
||||
meshlet_view_resources.raster_cluster_rightmost_slot,
|
||||
meshlet_view_bind_groups
|
||||
.remap_1d_to_2d_dispatch
|
||||
|
@ -253,6 +254,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
previous_view_offset,
|
||||
culling_first_pipeline,
|
||||
thread_per_cluster_workgroups,
|
||||
meshlet_view_resources.scene_cluster_count,
|
||||
meshlet_view_resources.raster_cluster_rightmost_slot,
|
||||
meshlet_view_bind_groups
|
||||
.remap_1d_to_2d_dispatch
|
||||
|
@ -288,6 +290,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
previous_view_offset,
|
||||
culling_second_pipeline,
|
||||
thread_per_cluster_workgroups,
|
||||
meshlet_view_resources.scene_cluster_count,
|
||||
meshlet_view_resources.raster_cluster_rightmost_slot,
|
||||
meshlet_view_bind_groups
|
||||
.remap_1d_to_2d_dispatch
|
||||
|
@ -334,21 +337,32 @@ fn fill_cluster_buffers_pass(
|
|||
render_context: &mut RenderContext,
|
||||
fill_cluster_buffers_bind_group: &BindGroup,
|
||||
fill_cluster_buffers_pass_pipeline: &ComputePipeline,
|
||||
fill_cluster_buffers_pass_workgroups: u32,
|
||||
cluster_count: u32,
|
||||
scene_instance_count: u32,
|
||||
) {
|
||||
let mut fill_cluster_buffers_pass_workgroups_x = scene_instance_count;
|
||||
let mut fill_cluster_buffers_pass_workgroups_y = 1;
|
||||
if scene_instance_count
|
||||
> render_context
|
||||
.render_device()
|
||||
.limits()
|
||||
.max_compute_workgroups_per_dimension
|
||||
{
|
||||
fill_cluster_buffers_pass_workgroups_x = (scene_instance_count as f32).sqrt().ceil() as u32;
|
||||
fill_cluster_buffers_pass_workgroups_y = fill_cluster_buffers_pass_workgroups_x;
|
||||
}
|
||||
|
||||
let command_encoder = render_context.command_encoder();
|
||||
let mut fill_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
|
||||
label: Some("fill_cluster_buffers"),
|
||||
timestamp_writes: None,
|
||||
});
|
||||
fill_pass.set_pipeline(fill_cluster_buffers_pass_pipeline);
|
||||
fill_pass.set_push_constants(0, &cluster_count.to_le_bytes());
|
||||
fill_pass.set_push_constants(0, &scene_instance_count.to_le_bytes());
|
||||
fill_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]);
|
||||
fill_pass.dispatch_workgroups(
|
||||
fill_cluster_buffers_pass_workgroups,
|
||||
fill_cluster_buffers_pass_workgroups,
|
||||
fill_cluster_buffers_pass_workgroups,
|
||||
fill_cluster_buffers_pass_workgroups_x,
|
||||
fill_cluster_buffers_pass_workgroups_y,
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -361,17 +375,26 @@ fn cull_pass(
|
|||
previous_view_offset: &PreviousViewUniformOffset,
|
||||
culling_pipeline: &ComputePipeline,
|
||||
culling_workgroups: u32,
|
||||
scene_cluster_count: u32,
|
||||
raster_cluster_rightmost_slot: u32,
|
||||
remap_1d_to_2d_dispatch_bind_group: Option<&BindGroup>,
|
||||
remap_1d_to_2d_dispatch_pipeline: Option<&ComputePipeline>,
|
||||
) {
|
||||
let max_compute_workgroups_per_dimension = render_context
|
||||
.render_device()
|
||||
.limits()
|
||||
.max_compute_workgroups_per_dimension;
|
||||
|
||||
let command_encoder = render_context.command_encoder();
|
||||
let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
|
||||
label: Some(label),
|
||||
timestamp_writes: None,
|
||||
});
|
||||
cull_pass.set_pipeline(culling_pipeline);
|
||||
cull_pass.set_push_constants(0, &raster_cluster_rightmost_slot.to_le_bytes());
|
||||
cull_pass.set_push_constants(
|
||||
0,
|
||||
bytemuck::cast_slice(&[scene_cluster_count, raster_cluster_rightmost_slot]),
|
||||
);
|
||||
cull_pass.set_bind_group(
|
||||
0,
|
||||
culling_bind_group,
|
||||
|
@ -384,6 +407,7 @@ fn cull_pass(
|
|||
remap_1d_to_2d_dispatch_bind_group,
|
||||
) {
|
||||
cull_pass.set_pipeline(remap_1d_to_2d_dispatch_pipeline);
|
||||
cull_pass.set_push_constants(0, &max_compute_workgroups_per_dimension.to_be_bytes());
|
||||
cull_pass.set_bind_group(0, remap_1d_to_2d_dispatch_bind_group, &[]);
|
||||
cull_pass.dispatch_workgroups(1, 1, 1);
|
||||
}
|
||||
|
|
|
@ -172,7 +172,7 @@ fn resolve_vertex_output(frag_coord: vec4<f32>) -> VertexOutput {
|
|||
ddy_uv,
|
||||
world_tangent,
|
||||
instance_uniform.flags,
|
||||
cluster_id,
|
||||
instance_id ^ meshlet_id,
|
||||
#ifdef PREPASS_FRAGMENT
|
||||
#ifdef MOTION_VECTOR_PREPASS
|
||||
motion_vector,
|
||||
|
|
|
@ -216,7 +216,7 @@ pub fn update_previous_view_data(
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Component)]
|
||||
#[derive(Component, Default)]
|
||||
pub struct PreviousGlobalTransform(pub Affine3A);
|
||||
|
||||
#[cfg(not(feature = "meshlet"))]
|
||||
|
|
Loading…
Reference in a new issue