Meshlet fill cluster buffers rewritten (#15955)

# Objective
- Make the meshlet fill cluster buffers pass slightly faster
- Address https://github.com/bevyengine/bevy/issues/15920 for meshlets
- Added PreviousGlobalTransform as a required meshlet component to avoid
extra archetype moves, slightly alleviating
https://github.com/bevyengine/bevy/issues/14681 for meshlets
- Enforce that MeshletPlugin::cluster_buffer_slots is not greater than
2^25 (glitches will occur otherwise). Technically this field controls
post-lod/culling cluster count, and the issue is on pre-lod/culling
cluster count, but it's still valid now, and in the future this will be
more true.

Needs to be merged after https://github.com/bevyengine/bevy/pull/15846
and https://github.com/bevyengine/bevy/pull/15886

## Solution

- Old pass dispatched a thread per cluster, and did a binary search over
the instances to find which instance the cluster belongs to, and what
meshlet index within the instance it is.
- New pass dispatches a workgroup per instance, and has the workgroup
loop over all meshlets in the instance in order to write out the cluster
data.
- Use a push constant instead of arrayLength to fix the linked bug
- Remap 1d->2d dispatch for software raster only if actually needed to
save on spawning excess workgroups

## Testing

- Did you test these changes? If so, how?
- Ran the meshlet example, and an example with 1041 instances of 32217
meshlets per instance. Profiled the second scene with nsight, went from
0.55ms -> 0.40ms. Small savings. We're pretty much VRAM bandwidth bound
at this point.
- How can other people (reviewers) test your changes? Is there anything
specific they need to know?
  - Run the meshlet example

## Changelog (non-meshlets)
- PreviousGlobalTransform now implements the Default trait
This commit is contained in:
JMS55 2024-10-23 12:18:49 -07:00 committed by GitHub
parent 6d42830c7f
commit 3fb6cefb2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 139 additions and 82 deletions

View file

@ -13,7 +13,7 @@
meshlet_software_raster_indirect_args,
meshlet_hardware_raster_indirect_args,
meshlet_raster_clusters,
meshlet_raster_cluster_rightmost_slot,
constants,
MeshletBoundingSphere,
}
#import bevy_render::maths::affine3_to_square
@ -32,7 +32,7 @@ fn cull_clusters(
) {
// Calculate the cluster ID for this thread
let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
if cluster_id >= arrayLength(&meshlet_cluster_meshlet_ids) { return; }
if cluster_id >= constants.scene_cluster_count { return; }
#ifdef MESHLET_SECOND_CULLING_PASS
if !cluster_is_second_pass_candidate(cluster_id) { return; }
@ -138,7 +138,7 @@ fn cull_clusters(
} else {
// Append this cluster to the list for hardware rasterization
buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u);
buffer_slot = meshlet_raster_cluster_rightmost_slot - buffer_slot;
buffer_slot = constants.meshlet_raster_cluster_rightmost_slot - buffer_slot;
}
meshlet_raster_clusters[buffer_slot] = cluster_id;
}

View file

@ -1,6 +1,7 @@
#import bevy_pbr::meshlet_bindings::{
cluster_count,
meshlet_instance_meshlet_counts_prefix_sum,
scene_instance_count,
meshlet_global_cluster_count,
meshlet_instance_meshlet_counts,
meshlet_instance_meshlet_slice_starts,
meshlet_cluster_instance_ids,
meshlet_cluster_meshlet_ids,
@ -8,37 +9,42 @@
/// Writes out instance_id and meshlet_id to the global buffers for each cluster in the scene.
var<workgroup> cluster_slice_start_workgroup: u32;
@compute
@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 cluster per thread
@workgroup_size(1024, 1, 1) // 1024 threads per workgroup, 1 instance per workgroup
fn fill_cluster_buffers(
@builtin(workgroup_id) workgroup_id: vec3<u32>,
@builtin(num_workgroups) num_workgroups: vec3<u32>,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
// Calculate the cluster ID for this thread
let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u));
if cluster_id >= cluster_count { return; } // TODO: Could be an arrayLength?
// Calculate the instance ID for this workgroup
var instance_id = workgroup_id.x + (workgroup_id.y * num_workgroups.x);
if instance_id >= scene_instance_count { return; }
// Binary search to find the instance this cluster belongs to
var left = 0u;
var right = arrayLength(&meshlet_instance_meshlet_counts_prefix_sum) - 1u;
while left <= right {
let mid = (left + right) / 2u;
if meshlet_instance_meshlet_counts_prefix_sum[mid] <= cluster_id {
left = mid + 1u;
} else {
right = mid - 1u;
}
let instance_meshlet_count = meshlet_instance_meshlet_counts[instance_id];
let instance_meshlet_slice_start = meshlet_instance_meshlet_slice_starts[instance_id];
// Reserve cluster slots for the instance and broadcast to the workgroup
if local_invocation_index == 0u {
cluster_slice_start_workgroup = atomicAdd(&meshlet_global_cluster_count, instance_meshlet_count);
}
let instance_id = right;
let cluster_slice_start = workgroupUniformLoad(&cluster_slice_start_workgroup);
// Find the meshlet ID for this cluster within the instance's MeshletMesh
let meshlet_id_local = cluster_id - meshlet_instance_meshlet_counts_prefix_sum[instance_id];
// Loop enough times to write out all the meshlets for the instance given that each thread writes 1 meshlet in each iteration
for (var clusters_written = 0u; clusters_written < instance_meshlet_count; clusters_written += 1024u) {
// Calculate meshlet ID within this instance's MeshletMesh to process for this thread
let meshlet_id_local = clusters_written + local_invocation_index;
if meshlet_id_local >= instance_meshlet_count { return; }
// Find the overall meshlet ID in the global meshlet buffer
let meshlet_id = meshlet_id_local + meshlet_instance_meshlet_slice_starts[instance_id];
// Find the overall cluster ID in the global cluster buffer
let cluster_id = cluster_slice_start + meshlet_id_local;
// Write results to buffers
meshlet_cluster_instance_ids[cluster_id] = instance_id;
meshlet_cluster_meshlet_ids[cluster_id] = meshlet_id;
// Find the overall meshlet ID in the global meshlet buffer
let meshlet_id = instance_meshlet_slice_start + meshlet_id_local;
// Write results to buffers
meshlet_cluster_instance_ids[cluster_id] = instance_id;
meshlet_cluster_meshlet_ids[cluster_id] = meshlet_id;
}
}

View file

@ -10,8 +10,9 @@ use bevy_ecs::{
query::Has,
system::{Local, Query, Res, ResMut, Resource, SystemState},
};
use bevy_render::sync_world::MainEntity;
use bevy_render::{render_resource::StorageBuffer, view::RenderLayers, MainWorld};
use bevy_render::{
render_resource::StorageBuffer, sync_world::MainEntity, view::RenderLayers, MainWorld,
};
use bevy_transform::components::GlobalTransform;
use bevy_utils::{HashMap, HashSet};
use core::ops::{DerefMut, Range};
@ -19,33 +20,36 @@ use core::ops::{DerefMut, Range};
/// Manages data for each entity with a [`MeshletMesh`].
#[derive(Resource)]
pub struct InstanceManager {
/// Amount of clusters in the scene (sum of all meshlet counts across all instances)
/// Amount of instances in the scene.
pub scene_instance_count: u32,
/// Amount of clusters in the scene.
pub scene_cluster_count: u32,
/// Per-instance [`MainEntity`], [`RenderLayers`], and [`NotShadowCaster`]
/// Per-instance [`MainEntity`], [`RenderLayers`], and [`NotShadowCaster`].
pub instances: Vec<(MainEntity, RenderLayers, bool)>,
/// Per-instance [`MeshUniform`]
/// Per-instance [`MeshUniform`].
pub instance_uniforms: StorageBuffer<Vec<MeshUniform>>,
/// Per-instance material ID
/// Per-instance material ID.
pub instance_material_ids: StorageBuffer<Vec<u32>>,
/// Prefix-sum of meshlet counts per instance
pub instance_meshlet_counts_prefix_sum: StorageBuffer<Vec<u32>>,
/// Per-instance index to the start of the instance's slice of the meshlets buffer
/// Per-instance count of meshlets in the instance's [`MeshletMesh`].
pub instance_meshlet_counts: StorageBuffer<Vec<u32>>,
/// Per-instance index to the start of the instance's slice of the meshlets buffer.
pub instance_meshlet_slice_starts: StorageBuffer<Vec<u32>>,
/// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support.
pub view_instance_visibility: EntityHashMap<StorageBuffer<Vec<u32>>>,
/// Next material ID available for a [`Material`]
/// Next material ID available for a [`Material`].
next_material_id: u32,
/// Map of [`Material`] to material ID
/// Map of [`Material`] to material ID.
material_id_lookup: HashMap<UntypedAssetId, u32>,
/// Set of material IDs used in the scene
/// Set of material IDs used in the scene.
material_ids_present_in_scene: HashSet<u32>,
}
impl InstanceManager {
pub fn new() -> Self {
Self {
scene_instance_count: 0,
scene_cluster_count: 0,
instances: Vec::new(),
@ -59,9 +63,9 @@ impl InstanceManager {
buffer.set_label(Some("meshlet_instance_material_ids"));
buffer
},
instance_meshlet_counts_prefix_sum: {
instance_meshlet_counts: {
let mut buffer = StorageBuffer::default();
buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum"));
buffer.set_label(Some("meshlet_instance_meshlet_counts"));
buffer
},
instance_meshlet_slice_starts: {
@ -80,7 +84,7 @@ impl InstanceManager {
#[allow(clippy::too_many_arguments)]
pub fn add_instance(
&mut self,
instance: Entity,
instance: MainEntity,
meshlets_slice: Range<u32>,
transform: &GlobalTransform,
previous_transform: Option<&PreviousGlobalTransform>,
@ -108,20 +112,21 @@ impl InstanceManager {
// Append instance data
self.instances.push((
instance.into(),
instance,
render_layers.cloned().unwrap_or(RenderLayers::default()),
not_shadow_caster,
));
self.instance_uniforms.get_mut().push(mesh_uniform);
self.instance_material_ids.get_mut().push(0);
self.instance_meshlet_counts_prefix_sum
self.instance_meshlet_counts
.get_mut()
.push(self.scene_cluster_count);
.push(meshlets_slice.len() as u32);
self.instance_meshlet_slice_starts
.get_mut()
.push(meshlets_slice.start);
self.scene_cluster_count += meshlets_slice.end - meshlets_slice.start;
self.scene_instance_count += 1;
self.scene_cluster_count += meshlets_slice.len() as u32;
}
/// Get the material ID for a [`crate::Material`].
@ -140,12 +145,13 @@ impl InstanceManager {
}
pub fn reset(&mut self, entities: &Entities) {
self.scene_instance_count = 0;
self.scene_cluster_count = 0;
self.instances.clear();
self.instance_uniforms.get_mut().clear();
self.instance_material_ids.get_mut().clear();
self.instance_meshlet_counts_prefix_sum.get_mut().clear();
self.instance_meshlet_counts.get_mut().clear();
self.instance_meshlet_slice_starts.get_mut().clear();
self.view_instance_visibility
.retain(|view_entity, _| entities.contains(*view_entity));
@ -227,7 +233,7 @@ pub fn extract_meshlet_mesh_entities(
// Add the instance's data to the instance manager
instance_manager.add_instance(
instance,
instance.into(),
meshlets_slice,
transform,
previous_transform,

View file

@ -51,15 +51,17 @@ struct DrawIndirectArgs {
const CENTIMETERS_PER_METER = 100.0;
#ifdef MESHLET_FILL_CLUSTER_BUFFERS_PASS
var<push_constant> cluster_count: u32;
@group(0) @binding(0) var<storage, read> meshlet_instance_meshlet_counts_prefix_sum: array<u32>; // Per entity instance
var<push_constant> scene_instance_count: u32;
@group(0) @binding(0) var<storage, read> meshlet_instance_meshlet_counts: array<u32>; // Per entity instance
@group(0) @binding(1) var<storage, read> meshlet_instance_meshlet_slice_starts: array<u32>; // Per entity instance
@group(0) @binding(2) var<storage, read_write> meshlet_cluster_instance_ids: array<u32>; // Per cluster
@group(0) @binding(3) var<storage, read_write> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
@group(0) @binding(4) var<storage, read_write> meshlet_global_cluster_count: atomic<u32>; // Single object shared between all workgroups
#endif
#ifdef MESHLET_CULLING_PASS
var<push_constant> meshlet_raster_cluster_rightmost_slot: u32;
struct Constants { scene_cluster_count: u32, meshlet_raster_cluster_rightmost_slot: u32 }
var<push_constant> constants: Constants;
@group(0) @binding(0) var<storage, read> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
@group(0) @binding(1) var<storage, read> meshlet_bounding_spheres: array<MeshletBoundingSpheres>; // Per meshlet
@group(0) @binding(2) var<storage, read> meshlet_simplification_errors: array<u32>; // Per meshlet
@ -67,9 +69,9 @@ var<push_constant> meshlet_raster_cluster_rightmost_slot: u32;
@group(0) @binding(4) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
@group(0) @binding(5) var<storage, read> meshlet_view_instance_visibility: array<u32>; // 1 bit per entity instance, packed as a bitmask
@group(0) @binding(6) var<storage, read_write> meshlet_second_pass_candidates: array<atomic<u32>>; // 1 bit per cluster , packed as a bitmask
@group(0) @binding(7) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups/clusters/triangles
@group(0) @binding(8) var<storage, read_write> meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/clusters/triangles
@group(0) @binding(9) var<storage, read_write> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups/clusters/triangles
@group(0) @binding(7) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups
@group(0) @binding(8) var<storage, read_write> meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups
@group(0) @binding(9) var<storage, read_write> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
@group(0) @binding(10) var depth_pyramid: texture_2d<f32>; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass
@group(0) @binding(11) var<uniform> view: View;
@group(0) @binding(12) var<uniform> previous_view: PreviousViewUniforms;
@ -95,7 +97,7 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool {
@group(0) @binding(3) var<storage, read> meshlet_vertex_positions: array<u32>; // Many per meshlet
@group(0) @binding(4) var<storage, read> meshlet_cluster_instance_ids: array<u32>; // Per cluster
@group(0) @binding(5) var<storage, read> meshlet_instance_uniforms: array<Mesh>; // Per entity instance
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups/clusters/triangles
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
@group(0) @binding(7) var<storage, read> meshlet_software_raster_cluster_count: u32;
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u64>>; // Per pixel

View file

@ -56,7 +56,7 @@ use self::{
},
visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode,
};
use crate::{graph::NodePbr, Material, MeshMaterial3d};
use crate::{graph::NodePbr, Material, MeshMaterial3d, PreviousGlobalTransform};
use bevy_app::{App, Plugin, PostUpdate};
use bevy_asset::{load_internal_asset, AssetApp, AssetId, Handle};
use bevy_core_pipeline::{
@ -129,6 +129,8 @@ pub struct MeshletPlugin {
/// If this number is too low, you'll see rendering artifacts like missing or blinking meshes.
///
/// Each cluster slot costs 4 bytes of VRAM.
///
/// Must not be greater than 2^25.
pub cluster_buffer_slots: u32,
}
@ -147,6 +149,11 @@ impl Plugin for MeshletPlugin {
#[cfg(target_endian = "big")]
compile_error!("MeshletPlugin is only supported on little-endian processors.");
if self.cluster_buffer_slots > 2_u32.pow(25) {
error!("MeshletPlugin::cluster_buffer_slots must not be greater than 2^25.");
std::process::exit(1);
}
load_internal_asset!(
app,
MESHLET_BINDINGS_SHADER_HANDLE,
@ -293,7 +300,7 @@ impl Plugin for MeshletPlugin {
/// The meshlet mesh equivalent of [`bevy_render::mesh::Mesh3d`].
#[derive(Component, Clone, Debug, Default, Deref, DerefMut, Reflect, PartialEq, Eq, From)]
#[reflect(Component, Default)]
#[require(Transform, Visibility)]
#[require(Transform, PreviousGlobalTransform, Visibility)]
pub struct MeshletMesh3d(pub Handle<MeshletMesh>);
impl From<MeshletMesh3d> for AssetId<MeshletMesh> {

View file

@ -84,7 +84,7 @@ impl FromWorld for MeshletPipelines {
layout: vec![cull_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
range: 0..8,
}],
shader: MESHLET_CULLING_SHADER_HANDLE,
shader_defs: vec![
@ -99,7 +99,7 @@ impl FromWorld for MeshletPipelines {
layout: vec![cull_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
range: 0..8,
}],
shader: MESHLET_CULLING_SHADER_HANDLE,
shader_defs: vec![
@ -441,7 +441,10 @@ impl FromWorld for MeshletPipelines {
pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor {
label: Some("meshlet_remap_1d_to_2d_dispatch_pipeline".into()),
layout: vec![layout],
push_constant_ranges: vec![],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
}],
shader: MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE,
shader_defs: vec![],
entry_point: "remap_dispatch".into(),

View file

@ -8,13 +8,16 @@ struct DispatchIndirectArgs {
@group(0) @binding(0) var<storage, read_write> meshlet_software_raster_indirect_args: DispatchIndirectArgs;
@group(0) @binding(1) var<storage, read_write> meshlet_software_raster_cluster_count: u32;
var<push_constant> max_compute_workgroups_per_dimension: u32;
@compute
@workgroup_size(1, 1, 1)
fn remap_dispatch() {
meshlet_software_raster_cluster_count = meshlet_software_raster_indirect_args.x;
let n = u32(ceil(sqrt(f32(meshlet_software_raster_indirect_args.x))));
meshlet_software_raster_indirect_args.x = n;
meshlet_software_raster_indirect_args.y = n;
if meshlet_software_raster_cluster_count > max_compute_workgroups_per_dimension {
let n = u32(ceil(sqrt(f32(meshlet_software_raster_cluster_count))));
meshlet_software_raster_indirect_args.x = n;
meshlet_software_raster_indirect_args.y = n;
}
}

View file

@ -122,6 +122,7 @@ impl ResourceManager {
storage_buffer_read_only_sized(false, None),
storage_buffer_sized(false, None),
storage_buffer_sized(false, None),
storage_buffer_sized(false, None),
),
),
),
@ -246,6 +247,7 @@ impl ResourceManager {
#[derive(Component)]
pub struct MeshletViewResources {
pub scene_instance_count: u32,
pub scene_cluster_count: u32,
pub second_pass_candidates_buffer: Buffer,
instance_visibility: Buffer,
@ -330,7 +332,7 @@ pub fn prepare_meshlet_per_frame_resources(
&render_queue,
);
upload_storage_buffer(
&mut instance_manager.instance_meshlet_counts_prefix_sum,
&mut instance_manager.instance_meshlet_counts,
&render_device,
&render_queue,
);
@ -340,9 +342,6 @@ pub fn prepare_meshlet_per_frame_resources(
&render_queue,
);
// Early submission for GPU data uploads to start while the render graph records commands
render_queue.submit([]);
let needed_buffer_size = 4 * instance_manager.scene_cluster_count as u64;
match &mut resource_manager.cluster_instance_ids {
Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(),
@ -553,6 +552,7 @@ pub fn prepare_meshlet_per_frame_resources(
};
commands.entity(view_entity).insert(MeshletViewResources {
scene_instance_count: instance_manager.scene_instance_count,
scene_cluster_count: instance_manager.scene_cluster_count,
second_pass_candidates_buffer,
instance_visibility,
@ -602,19 +602,25 @@ pub fn prepare_meshlet_view_bind_groups(
let first_node = Arc::new(AtomicBool::new(true));
let fill_cluster_buffers_global_cluster_count =
render_device.create_buffer(&BufferDescriptor {
label: Some("meshlet_fill_cluster_buffers_global_cluster_count"),
size: 4,
usage: BufferUsages::STORAGE,
mapped_at_creation: false,
});
// TODO: Some of these bind groups can be reused across multiple views
for (view_entity, view_resources) in &views {
let entries = BindGroupEntries::sequential((
instance_manager
.instance_meshlet_counts_prefix_sum
.binding()
.unwrap(),
instance_manager.instance_meshlet_counts.binding().unwrap(),
instance_manager
.instance_meshlet_slice_starts
.binding()
.unwrap(),
cluster_instance_ids.as_entire_binding(),
cluster_meshlet_ids.as_entire_binding(),
fill_cluster_buffers_global_cluster_count.as_entire_binding(),
));
let fill_cluster_buffers = render_device.create_bind_group(
"meshlet_fill_cluster_buffers",

View file

@ -118,8 +118,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context,
&meshlet_view_bind_groups.fill_cluster_buffers,
fill_cluster_buffers_pipeline,
thread_per_cluster_workgroups,
meshlet_view_resources.scene_cluster_count,
meshlet_view_resources.scene_instance_count,
);
}
cull_pass(
@ -130,6 +129,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
previous_view_offset,
culling_first_pipeline,
thread_per_cluster_workgroups,
meshlet_view_resources.scene_cluster_count,
meshlet_view_resources.raster_cluster_rightmost_slot,
meshlet_view_bind_groups
.remap_1d_to_2d_dispatch
@ -165,6 +165,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
previous_view_offset,
culling_second_pipeline,
thread_per_cluster_workgroups,
meshlet_view_resources.scene_cluster_count,
meshlet_view_resources.raster_cluster_rightmost_slot,
meshlet_view_bind_groups
.remap_1d_to_2d_dispatch
@ -253,6 +254,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
previous_view_offset,
culling_first_pipeline,
thread_per_cluster_workgroups,
meshlet_view_resources.scene_cluster_count,
meshlet_view_resources.raster_cluster_rightmost_slot,
meshlet_view_bind_groups
.remap_1d_to_2d_dispatch
@ -288,6 +290,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
previous_view_offset,
culling_second_pipeline,
thread_per_cluster_workgroups,
meshlet_view_resources.scene_cluster_count,
meshlet_view_resources.raster_cluster_rightmost_slot,
meshlet_view_bind_groups
.remap_1d_to_2d_dispatch
@ -334,21 +337,32 @@ fn fill_cluster_buffers_pass(
render_context: &mut RenderContext,
fill_cluster_buffers_bind_group: &BindGroup,
fill_cluster_buffers_pass_pipeline: &ComputePipeline,
fill_cluster_buffers_pass_workgroups: u32,
cluster_count: u32,
scene_instance_count: u32,
) {
let mut fill_cluster_buffers_pass_workgroups_x = scene_instance_count;
let mut fill_cluster_buffers_pass_workgroups_y = 1;
if scene_instance_count
> render_context
.render_device()
.limits()
.max_compute_workgroups_per_dimension
{
fill_cluster_buffers_pass_workgroups_x = (scene_instance_count as f32).sqrt().ceil() as u32;
fill_cluster_buffers_pass_workgroups_y = fill_cluster_buffers_pass_workgroups_x;
}
let command_encoder = render_context.command_encoder();
let mut fill_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some("fill_cluster_buffers"),
timestamp_writes: None,
});
fill_pass.set_pipeline(fill_cluster_buffers_pass_pipeline);
fill_pass.set_push_constants(0, &cluster_count.to_le_bytes());
fill_pass.set_push_constants(0, &scene_instance_count.to_le_bytes());
fill_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]);
fill_pass.dispatch_workgroups(
fill_cluster_buffers_pass_workgroups,
fill_cluster_buffers_pass_workgroups,
fill_cluster_buffers_pass_workgroups,
fill_cluster_buffers_pass_workgroups_x,
fill_cluster_buffers_pass_workgroups_y,
1,
);
}
@ -361,17 +375,26 @@ fn cull_pass(
previous_view_offset: &PreviousViewUniformOffset,
culling_pipeline: &ComputePipeline,
culling_workgroups: u32,
scene_cluster_count: u32,
raster_cluster_rightmost_slot: u32,
remap_1d_to_2d_dispatch_bind_group: Option<&BindGroup>,
remap_1d_to_2d_dispatch_pipeline: Option<&ComputePipeline>,
) {
let max_compute_workgroups_per_dimension = render_context
.render_device()
.limits()
.max_compute_workgroups_per_dimension;
let command_encoder = render_context.command_encoder();
let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some(label),
timestamp_writes: None,
});
cull_pass.set_pipeline(culling_pipeline);
cull_pass.set_push_constants(0, &raster_cluster_rightmost_slot.to_le_bytes());
cull_pass.set_push_constants(
0,
bytemuck::cast_slice(&[scene_cluster_count, raster_cluster_rightmost_slot]),
);
cull_pass.set_bind_group(
0,
culling_bind_group,
@ -384,6 +407,7 @@ fn cull_pass(
remap_1d_to_2d_dispatch_bind_group,
) {
cull_pass.set_pipeline(remap_1d_to_2d_dispatch_pipeline);
cull_pass.set_push_constants(0, &max_compute_workgroups_per_dimension.to_be_bytes());
cull_pass.set_bind_group(0, remap_1d_to_2d_dispatch_bind_group, &[]);
cull_pass.dispatch_workgroups(1, 1, 1);
}

View file

@ -172,7 +172,7 @@ fn resolve_vertex_output(frag_coord: vec4<f32>) -> VertexOutput {
ddy_uv,
world_tangent,
instance_uniform.flags,
cluster_id,
instance_id ^ meshlet_id,
#ifdef PREPASS_FRAGMENT
#ifdef MOTION_VECTOR_PREPASS
motion_vector,

View file

@ -216,7 +216,7 @@ pub fn update_previous_view_data(
}
}
#[derive(Component)]
#[derive(Component, Default)]
pub struct PreviousGlobalTransform(pub Affine3A);
#[cfg(not(feature = "meshlet"))]