Meshlet single pass depth downsampling (SPD) (#13003)

# Objective
- Using multiple raster passes to generate the depth pyramid is
extremely slow
- Pulling data from the source image is the largest bottleneck, it's
important to sample in a cache-aware pattern
- Barriers and pipeline drain between the raster passes is the second
largest bottleneck
- Each separate RenderPass on the CPU is _really_ expensive

## Solution
- Port [FidelityFX SPD](https://gpuopen.com/fidelityfx-spd) to WGSL,
replacing meshlet's existing multiple raster passes with a ~~single~~
two compute dispatches. Lack of coherent buffers means we have to do the
the last 64x64 tile from mip 7+ in a separate dispatch to ensure the mip
6 writes were flushed :(
- Workgroup shared memory version only at the moment, as the subgroup
operation is blocked by our upgrade to wgpu 0.20 #13186
- Don't enforce a power-of-2 depth pyramid texture size, simply scaling
by 0.5 is fine
This commit is contained in:
JMS55 2024-06-03 05:41:14 -07:00 committed by GitHub
parent b45786df41
commit 5536079945
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 441 additions and 108 deletions

View file

@ -1,16 +1,293 @@
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
@group(0) @binding(0) var mip_0: texture_depth_2d;
@group(0) @binding(1) var mip_1: texture_storage_2d<r32float, write>;
@group(0) @binding(2) var mip_2: texture_storage_2d<r32float, write>;
@group(0) @binding(3) var mip_3: texture_storage_2d<r32float, write>;
@group(0) @binding(4) var mip_4: texture_storage_2d<r32float, write>;
@group(0) @binding(5) var mip_5: texture_storage_2d<r32float, write>;
@group(0) @binding(6) var mip_6: texture_storage_2d<r32float, read_write>;
@group(0) @binding(7) var mip_7: texture_storage_2d<r32float, write>;
@group(0) @binding(8) var mip_8: texture_storage_2d<r32float, write>;
@group(0) @binding(9) var mip_9: texture_storage_2d<r32float, write>;
@group(0) @binding(10) var mip_10: texture_storage_2d<r32float, write>;
@group(0) @binding(11) var mip_11: texture_storage_2d<r32float, write>;
@group(0) @binding(12) var mip_12: texture_storage_2d<r32float, write>;
@group(0) @binding(13) var samplr: sampler;
var<push_constant> max_mip_level: u32;
@group(0) @binding(0) var input_depth: texture_2d<f32>;
@group(0) @binding(1) var samplr: sampler;
/// Generates a hierarchical depth buffer.
/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528
/// Performs a 2x2 downsample on a depth texture to generate the next mip level of a hierarchical depth buffer.
var<workgroup> intermediate_memory: array<array<f32, 16>, 16>;
@fragment
fn downsample_depth(in: FullscreenVertexOutput) -> @location(0) vec4<f32> {
let depth_quad = textureGather(0, input_depth, samplr, in.uv);
let downsampled_depth = min(
min(depth_quad.x, depth_quad.y),
min(depth_quad.z, depth_quad.w),
);
return vec4(downsampled_depth, 0.0, 0.0, 0.0);
@compute
@workgroup_size(256, 1, 1)
fn downsample_depth_first(
@builtin(num_workgroups) num_workgroups: vec3u,
@builtin(workgroup_id) workgroup_id: vec3u,
@builtin(local_invocation_index) local_invocation_index: u32,
) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index);
downsample_mips_2_to_5(x, y, workgroup_id.xy, local_invocation_index);
}
@compute
@workgroup_size(256, 1, 1)
fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) {
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
downsample_mips_6_and_7(x, y);
downsample_mips_8_to_11(x, y, local_invocation_index);
}
fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
var v: vec4f;
var tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u);
var pix = vec2(workgroup_id * 32u) + vec2(x, y);
v[0] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[0]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u);
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y);
v[1] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[1]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u);
pix = vec2(workgroup_id * 32u) + vec2(x, y + 16u);
v[2] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[2]));
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u);
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y + 16u);
v[3] = reduce_load_mip_0(tex);
textureStore(mip_1, pix, vec4(v[3]));
if max_mip_level <= 1u { return; }
for (var i = 0u; i < 4u; i++) {
intermediate_memory[x][y] = v[i];
workgroupBarrier();
if local_invocation_index < 64u {
v[i] = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
pix = (workgroup_id * 16u) + vec2(
x + (i % 2u) * 8u,
y + (i / 2u) * 8u,
);
textureStore(mip_2, pix, vec4(v[i]));
}
workgroupBarrier();
}
if local_invocation_index < 64u {
intermediate_memory[x + 0u][y + 0u] = v[0];
intermediate_memory[x + 8u][y + 0u] = v[1];
intermediate_memory[x + 0u][y + 8u] = v[2];
intermediate_memory[x + 8u][y + 8u] = v[3];
}
}
fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if max_mip_level <= 2u { return; }
workgroupBarrier();
downsample_mip_2(x, y, workgroup_id, local_invocation_index);
if max_mip_level <= 3u { return; }
workgroupBarrier();
downsample_mip_3(x, y, workgroup_id, local_invocation_index);
if max_mip_level <= 4u { return; }
workgroupBarrier();
downsample_mip_4(x, y, workgroup_id, local_invocation_index);
if max_mip_level <= 5u { return; }
workgroupBarrier();
downsample_mip_5(workgroup_id, local_invocation_index);
}
fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 64u {
let v = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v));
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
}
}
fn downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 16u {
let v = reduce_4(vec4(
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
));
textureStore(mip_4, (workgroup_id * 4u) + vec2(x, y), vec4(v));
intermediate_memory[x * 4u + y][y * 4u] = v;
}
}
fn downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 4u {
let v = reduce_4(vec4(
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
));
textureStore(mip_5, (workgroup_id * 2u) + vec2(x, y), vec4(v));
intermediate_memory[x + y * 2u][0u] = v;
}
}
fn downsample_mip_5(workgroup_id: vec2u, local_invocation_index: u32) {
if local_invocation_index < 1u {
let v = reduce_4(vec4(
intermediate_memory[0u][0u],
intermediate_memory[1u][0u],
intermediate_memory[2u][0u],
intermediate_memory[3u][0u],
));
textureStore(mip_6, workgroup_id, vec4(v));
}
}
fn downsample_mips_6_and_7(x: u32, y: u32) {
var v: vec4f;
var tex = vec2(x * 4u + 0u, y * 4u + 0u);
var pix = vec2(x * 2u + 0u, y * 2u + 0u);
v[0] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[0]));
tex = vec2(x * 4u + 2u, y * 4u + 0u);
pix = vec2(x * 2u + 1u, y * 2u + 0u);
v[1] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[1]));
tex = vec2(x * 4u + 0u, y * 4u + 2u);
pix = vec2(x * 2u + 0u, y * 2u + 1u);
v[2] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[2]));
tex = vec2(x * 4u + 2u, y * 4u + 2u);
pix = vec2(x * 2u + 1u, y * 2u + 1u);
v[3] = reduce_load_mip_6(tex);
textureStore(mip_7, pix, vec4(v[3]));
if max_mip_level <= 7u { return; }
let vr = reduce_4(v);
textureStore(mip_8, vec2(x, y), vec4(vr));
intermediate_memory[x][y] = vr;
}
fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) {
if max_mip_level <= 8u { return; }
workgroupBarrier();
downsample_mip_8(x, y, local_invocation_index);
if max_mip_level <= 9u { return; }
workgroupBarrier();
downsample_mip_9(x, y, local_invocation_index);
if max_mip_level <= 10u { return; }
workgroupBarrier();
downsample_mip_10(x, y, local_invocation_index);
if max_mip_level <= 11u { return; }
workgroupBarrier();
downsample_mip_11(local_invocation_index);
}
fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 64u {
let v = reduce_4(vec4(
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
));
textureStore(mip_9, vec2(x, y), vec4(v));
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
}
}
fn downsample_mip_9(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 16u {
let v = reduce_4(vec4(
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
));
textureStore(mip_10, vec2(x, y), vec4(v));
intermediate_memory[x * 4u + y][y * 4u] = v;
}
}
fn downsample_mip_10(x: u32, y: u32, local_invocation_index: u32) {
if local_invocation_index < 4u {
let v = reduce_4(vec4(
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
));
textureStore(mip_11, vec2(x, y), vec4(v));
intermediate_memory[x + y * 2u][0u] = v;
}
}
fn downsample_mip_11(local_invocation_index: u32) {
if local_invocation_index < 1u {
let v = reduce_4(vec4(
intermediate_memory[0u][0u],
intermediate_memory[1u][0u],
intermediate_memory[2u][0u],
intermediate_memory[3u][0u],
));
textureStore(mip_12, vec2(0u, 0u), vec4(v));
}
}
fn remap_for_wave_reduction(a: u32) -> vec2u {
return vec2(
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u),
);
}
fn reduce_load_mip_0(tex: vec2u) -> f32 {
let uv = (vec2f(tex) + 0.5) / vec2f(textureDimensions(mip_0));
return reduce_4(textureGather(mip_0, samplr, uv));
}
fn reduce_load_mip_6(tex: vec2u) -> f32 {
return reduce_4(vec4(
textureLoad(mip_6, tex + vec2(0u, 0u)).r,
textureLoad(mip_6, tex + vec2(0u, 1u)).r,
textureLoad(mip_6, tex + vec2(1u, 0u)).r,
textureLoad(mip_6, tex + vec2(1u, 1u)).r,
));
}
fn reduce_4(v: vec4f) -> f32 {
return min(min(v.x, v.y), min(v.z, v.w));
}

View file

@ -6,6 +6,8 @@
meshlet_cluster_meshlet_ids,
}
/// Writes out instance_id and meshlet_id to the global buffers for each cluster in the scene.
@compute
@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 cluster per thread
fn fill_cluster_buffers(

View file

@ -19,6 +19,7 @@ use bevy_ecs::{
system::{Commands, Local, Query, Res, ResMut, Resource, SystemState},
world::{FromWorld, World},
};
use bevy_math::{UVec2, Vec4Swizzles};
use bevy_render::{
render_resource::{binding_types::*, *},
renderer::{RenderDevice, RenderQueue},
@ -30,7 +31,7 @@ use bevy_transform::components::GlobalTransform;
use bevy_utils::{default, HashMap, HashSet};
use encase::internal::WriteInto;
use std::{
iter,
array, iter,
mem::size_of,
ops::{DerefMut, Range},
sync::{atomic::AtomicBool, Arc},
@ -376,9 +377,8 @@ pub fn prepare_meshlet_per_frame_resources(
});
let depth_pyramid_size = Extent3d {
// Round down to the nearest power of 2 to ensure depth is conservative
width: previous_power_of_2(view.viewport.z),
height: previous_power_of_2(view.viewport.w),
width: view.viewport.z.div_ceil(2),
height: view.viewport.w.div_ceil(2),
depth_or_array_layers: 1,
};
let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
@ -391,24 +391,26 @@ pub fn prepare_meshlet_per_frame_resources(
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::R32Float,
usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
view_formats: &[],
},
);
let depth_pyramid_mips = (0..depth_pyramid_mip_count)
.map(|i| {
let depth_pyramid_mips = array::from_fn(|i| {
if (i as u32) < depth_pyramid_mip_count {
depth_pyramid.texture.create_view(&TextureViewDescriptor {
label: Some("meshlet_depth_pyramid_texture_view"),
format: Some(TextureFormat::R32Float),
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
base_mip_level: i,
base_mip_level: i as u32,
mip_level_count: Some(1),
base_array_layer: 0,
array_layer_count: Some(1),
})
})
.collect::<Box<[TextureView]>>();
} else {
gpu_scene.depth_pyramid_dummy_texture.clone()
}
});
let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) {
@ -461,11 +463,13 @@ pub fn prepare_meshlet_per_frame_resources(
visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(),
depth_pyramid_all_mips,
depth_pyramid_mips,
depth_pyramid_mip_count,
previous_depth_pyramid,
material_depth_color: not_shadow_view
.then(|| texture_cache.get(&render_device, material_depth_color)),
material_depth: not_shadow_view
.then(|| texture_cache.get(&render_device, material_depth)),
view_size: view.viewport.zw(),
});
}
}
@ -573,22 +577,26 @@ pub fn prepare_meshlet_view_bind_groups(
(None, Some(shadow_view)) => &shadow_view.depth_attachment.view,
_ => unreachable!(),
};
let downsample_depth = (0..view_resources.depth_pyramid_mips.len())
.map(|i| {
render_device.create_bind_group(
"meshlet_downsample_depth_bind_group",
&gpu_scene.downsample_depth_bind_group_layout,
&BindGroupEntries::sequential((
if i == 0 {
view_depth_texture
} else {
&view_resources.depth_pyramid_mips[i - 1]
},
&gpu_scene.depth_pyramid_sampler,
)),
)
})
.collect();
let downsample_depth = render_device.create_bind_group(
"meshlet_downsample_depth_bind_group",
&gpu_scene.downsample_depth_bind_group_layout,
&BindGroupEntries::sequential((
view_depth_texture,
&view_resources.depth_pyramid_mips[0],
&view_resources.depth_pyramid_mips[1],
&view_resources.depth_pyramid_mips[2],
&view_resources.depth_pyramid_mips[3],
&view_resources.depth_pyramid_mips[4],
&view_resources.depth_pyramid_mips[5],
&view_resources.depth_pyramid_mips[6],
&view_resources.depth_pyramid_mips[7],
&view_resources.depth_pyramid_mips[8],
&view_resources.depth_pyramid_mips[9],
&view_resources.depth_pyramid_mips[10],
&view_resources.depth_pyramid_mips[11],
&gpu_scene.depth_pyramid_sampler,
)),
);
let entries = BindGroupEntries::sequential((
cluster_meshlet_ids.as_entire_binding(),
@ -698,6 +706,7 @@ pub struct MeshletGpuScene {
copy_material_depth_bind_group_layout: BindGroupLayout,
material_draw_bind_group_layout: BindGroupLayout,
depth_pyramid_sampler: Sampler,
depth_pyramid_dummy_texture: TextureView,
}
impl FromWorld for MeshletGpuScene {
@ -783,13 +792,30 @@ impl FromWorld for MeshletGpuScene {
),
downsample_depth_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_downsample_depth_bind_group_layout",
&BindGroupLayoutEntries::sequential(
ShaderStages::FRAGMENT,
&BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, {
let write_only_r32float = || {
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly)
};
(
texture_2d(TextureSampleType::Float { filterable: false }),
texture_depth_2d(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
texture_storage_2d(
TextureFormat::R32Float,
StorageTextureAccess::ReadWrite,
),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
write_only_r32float(),
sampler(SamplerBindingType::NonFiltering),
),
),
)
}),
),
visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout(
"meshlet_visibility_buffer_raster_bind_group_layout",
@ -836,6 +862,31 @@ impl FromWorld for MeshletGpuScene {
label: Some("meshlet_depth_pyramid_sampler"),
..default()
}),
depth_pyramid_dummy_texture: render_device
.create_texture(&TextureDescriptor {
label: Some("meshlet_depth_pyramid_dummy_texture"),
size: Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: TextureDimension::D2,
format: TextureFormat::R32Float,
usage: TextureUsages::STORAGE_BINDING,
view_formats: &[],
})
.create_view(&TextureViewDescriptor {
label: Some("meshlet_depth_pyramid_dummy_texture_view"),
format: Some(TextureFormat::R32Float),
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
base_mip_level: 0,
mip_level_count: Some(1),
base_array_layer: 0,
array_layer_count: Some(1),
}),
}
}
}
@ -978,10 +1029,12 @@ pub struct MeshletViewResources {
pub visibility_buffer_draw_indirect_args_second: Buffer,
visibility_buffer_draw_triangle_buffer: Buffer,
depth_pyramid_all_mips: TextureView,
pub depth_pyramid_mips: Box<[TextureView]>,
depth_pyramid_mips: [TextureView; 12],
pub depth_pyramid_mip_count: u32,
previous_depth_pyramid: TextureView,
pub material_depth_color: Option<CachedTexture>,
pub material_depth: Option<CachedTexture>,
pub view_size: UVec2,
}
#[derive(Component)]
@ -990,18 +1043,8 @@ pub struct MeshletViewBindGroups {
pub fill_cluster_buffers: BindGroup,
pub culling_first: BindGroup,
pub culling_second: BindGroup,
pub downsample_depth: Box<[BindGroup]>,
pub downsample_depth: BindGroup,
pub visibility_buffer_raster: BindGroup,
pub copy_material_depth: Option<BindGroup>,
pub material_draw: Option<BindGroup>,
}
fn previous_power_of_2(x: u32) -> u32 {
// If x is a power of 2, halve it
if x.count_ones() == 1 {
x / 2
} else {
// Else calculate the largest power of 2 that is less than x
1 << (31 - x.leading_zeros())
}
}

View file

@ -24,7 +24,8 @@ pub struct MeshletPipelines {
fill_cluster_buffers: CachedComputePipelineId,
cull_first: CachedComputePipelineId,
cull_second: CachedComputePipelineId,
downsample_depth: CachedRenderPipelineId,
downsample_depth_first: CachedComputePipelineId,
downsample_depth_second: CachedComputePipelineId,
visibility_buffer_raster: CachedRenderPipelineId,
visibility_buffer_raster_depth_only: CachedRenderPipelineId,
visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId,
@ -81,25 +82,33 @@ impl FromWorld for MeshletPipelines {
entry_point: "cull_meshlets".into(),
}),
downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
label: Some("meshlet_downsample_depth".into()),
layout: vec![downsample_depth_layout],
push_constant_ranges: vec![],
vertex: fullscreen_shader_vertex_state(),
primitive: PrimitiveState::default(),
depth_stencil: None,
multisample: MultisampleState::default(),
fragment: Some(FragmentState {
downsample_depth_first: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_downsample_depth_first_pipeline".into()),
layout: vec![downsample_depth_layout.clone()],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
}],
shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec![],
entry_point: "downsample_depth".into(),
targets: vec![Some(ColorTargetState {
format: TextureFormat::R32Float,
blend: None,
write_mask: ColorWrites::ALL,
})],
}),
}),
entry_point: "downsample_depth_first".into(),
},
),
downsample_depth_second: pipeline_cache.queue_compute_pipeline(
ComputePipelineDescriptor {
label: Some("meshlet_downsample_depth_second_pipeline".into()),
layout: vec![downsample_depth_layout],
push_constant_ranges: vec![PushConstantRange {
stages: ShaderStages::COMPUTE,
range: 0..4,
}],
shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
shader_defs: vec![],
entry_point: "downsample_depth_second".into(),
},
),
visibility_buffer_raster: pipeline_cache.queue_render_pipeline(
RenderPipelineDescriptor {
@ -233,7 +242,7 @@ impl FromWorld for MeshletPipelines {
),
copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
label: Some("meshlet_copy_material_depth".into()),
label: Some("meshlet_copy_material_depth_pipeline".into()),
layout: vec![copy_material_depth_layout],
push_constant_ranges: vec![],
vertex: fullscreen_shader_vertex_state(),
@ -264,7 +273,8 @@ impl MeshletPipelines {
&ComputePipeline,
&ComputePipeline,
&ComputePipeline,
&RenderPipeline,
&ComputePipeline,
&ComputePipeline,
&RenderPipeline,
&RenderPipeline,
&RenderPipeline,
@ -276,7 +286,8 @@ impl MeshletPipelines {
pipeline_cache.get_compute_pipeline(pipeline.fill_cluster_buffers)?,
pipeline_cache.get_compute_pipeline(pipeline.cull_first)?,
pipeline_cache.get_compute_pipeline(pipeline.cull_second)?,
pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?,
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?,
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?,
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?,
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?,
pipeline_cache

View file

@ -54,6 +54,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
self.view_light_query.update_archetypes(world);
}
// TODO: Reuse compute/render passes between logical passes where possible, as they're expensive
fn run(
&self,
graph: &mut RenderGraphContext,
@ -77,7 +78,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
fill_cluster_buffers_pipeline,
culling_first_pipeline,
culling_second_pipeline,
downsample_depth_pipeline,
downsample_depth_first_pipeline,
downsample_depth_second_pipeline,
visibility_buffer_raster_pipeline,
visibility_buffer_raster_depth_only_pipeline,
visibility_buffer_raster_depth_only_clamp_ortho,
@ -137,7 +139,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
downsample_depth_first_pipeline,
downsample_depth_second_pipeline,
);
cull_pass(
"culling_second",
@ -170,7 +173,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
downsample_depth_first_pipeline,
downsample_depth_second_pipeline,
);
render_context.command_encoder().pop_debug_group();
@ -225,7 +229,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
downsample_depth_first_pipeline,
downsample_depth_second_pipeline,
);
cull_pass(
"culling_second",
@ -251,7 +256,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
render_context,
meshlet_view_resources,
meshlet_view_bind_groups,
downsample_depth_pipeline,
downsample_depth_first_pipeline,
downsample_depth_second_pipeline,
);
render_context.command_encoder().pop_debug_group();
}
@ -260,7 +266,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
}
}
// TODO: Reuse same compute pass as cull_pass
fn fill_cluster_buffers_pass(
render_context: &mut RenderContext,
fill_cluster_buffers_bind_group: &BindGroup,
@ -380,35 +385,30 @@ fn downsample_depth(
render_context: &mut RenderContext,
meshlet_view_resources: &MeshletViewResources,
meshlet_view_bind_groups: &MeshletViewBindGroups,
downsample_depth_pipeline: &RenderPipeline,
downsample_depth_first_pipeline: &ComputePipeline,
downsample_depth_second_pipeline: &ComputePipeline,
) {
render_context
.command_encoder()
.push_debug_group("meshlet_downsample_depth");
let command_encoder = render_context.command_encoder();
let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
label: Some("downsample_depth"),
timestamp_writes: None,
});
downsample_pass.set_pipeline(downsample_depth_first_pipeline);
downsample_pass.set_push_constants(
0,
&meshlet_view_resources.depth_pyramid_mip_count.to_le_bytes(),
);
downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]);
downsample_pass.dispatch_workgroups(
meshlet_view_resources.view_size.x.div_ceil(64),
meshlet_view_resources.view_size.y.div_ceil(64),
1,
);
for i in 0..meshlet_view_resources.depth_pyramid_mips.len() {
let downsample_pass = RenderPassDescriptor {
label: Some("downsample_depth"),
color_attachments: &[Some(RenderPassColorAttachment {
view: &meshlet_view_resources.depth_pyramid_mips[i],
resolve_target: None,
ops: Operations {
load: LoadOp::Clear(LinearRgba::BLACK.into()),
store: StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
};
let mut downsample_pass = render_context.begin_tracked_render_pass(downsample_pass);
downsample_pass.set_render_pipeline(downsample_depth_pipeline);
downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth[i], &[]);
downsample_pass.draw(0..3, 0..1);
if meshlet_view_resources.depth_pyramid_mip_count >= 7 {
downsample_pass.set_pipeline(downsample_depth_second_pipeline);
downsample_pass.dispatch_workgroups(1, 1, 1);
}
render_context.command_encoder().pop_debug_group();
}
fn copy_material_depth_pass(