mirror of
https://github.com/bevyengine/bevy
synced 2024-11-10 07:04:33 +00:00
Meshlet single pass depth downsampling (SPD) (#13003)
# Objective - Using multiple raster passes to generate the depth pyramid is extremely slow - Pulling data from the source image is the largest bottleneck, it's important to sample in a cache-aware pattern - Barriers and pipeline drain between the raster passes is the second largest bottleneck - Each separate RenderPass on the CPU is _really_ expensive ## Solution - Port [FidelityFX SPD](https://gpuopen.com/fidelityfx-spd) to WGSL, replacing meshlet's existing multiple raster passes with a ~~single~~ two compute dispatches. Lack of coherent buffers means we have to do the the last 64x64 tile from mip 7+ in a separate dispatch to ensure the mip 6 writes were flushed :( - Workgroup shared memory version only at the moment, as the subgroup operation is blocked by our upgrade to wgpu 0.20 #13186 - Don't enforce a power-of-2 depth pyramid texture size, simply scaling by 0.5 is fine
This commit is contained in:
parent
b45786df41
commit
5536079945
5 changed files with 441 additions and 108 deletions
|
@ -1,16 +1,293 @@
|
|||
#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput
|
||||
@group(0) @binding(0) var mip_0: texture_depth_2d;
|
||||
@group(0) @binding(1) var mip_1: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(2) var mip_2: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(3) var mip_3: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(4) var mip_4: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(5) var mip_5: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(6) var mip_6: texture_storage_2d<r32float, read_write>;
|
||||
@group(0) @binding(7) var mip_7: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(8) var mip_8: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(9) var mip_9: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(10) var mip_10: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(11) var mip_11: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(12) var mip_12: texture_storage_2d<r32float, write>;
|
||||
@group(0) @binding(13) var samplr: sampler;
|
||||
var<push_constant> max_mip_level: u32;
|
||||
|
||||
@group(0) @binding(0) var input_depth: texture_2d<f32>;
|
||||
@group(0) @binding(1) var samplr: sampler;
|
||||
/// Generates a hierarchical depth buffer.
|
||||
/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528
|
||||
|
||||
/// Performs a 2x2 downsample on a depth texture to generate the next mip level of a hierarchical depth buffer.
|
||||
var<workgroup> intermediate_memory: array<array<f32, 16>, 16>;
|
||||
|
||||
@fragment
|
||||
fn downsample_depth(in: FullscreenVertexOutput) -> @location(0) vec4<f32> {
|
||||
let depth_quad = textureGather(0, input_depth, samplr, in.uv);
|
||||
let downsampled_depth = min(
|
||||
min(depth_quad.x, depth_quad.y),
|
||||
min(depth_quad.z, depth_quad.w),
|
||||
);
|
||||
return vec4(downsampled_depth, 0.0, 0.0, 0.0);
|
||||
@compute
|
||||
@workgroup_size(256, 1, 1)
|
||||
fn downsample_depth_first(
|
||||
@builtin(num_workgroups) num_workgroups: vec3u,
|
||||
@builtin(workgroup_id) workgroup_id: vec3u,
|
||||
@builtin(local_invocation_index) local_invocation_index: u32,
|
||||
) {
|
||||
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
|
||||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
|
||||
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
|
||||
|
||||
downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index);
|
||||
|
||||
downsample_mips_2_to_5(x, y, workgroup_id.xy, local_invocation_index);
|
||||
}
|
||||
|
||||
@compute
|
||||
@workgroup_size(256, 1, 1)
|
||||
fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) {
|
||||
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u);
|
||||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u);
|
||||
let y = sub_xy.y + 8u * (local_invocation_index >> 7u);
|
||||
|
||||
downsample_mips_6_and_7(x, y);
|
||||
|
||||
downsample_mips_8_to_11(x, y, local_invocation_index);
|
||||
}
|
||||
|
||||
fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
var v: vec4f;
|
||||
|
||||
var tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u);
|
||||
var pix = vec2(workgroup_id * 32u) + vec2(x, y);
|
||||
v[0] = reduce_load_mip_0(tex);
|
||||
textureStore(mip_1, pix, vec4(v[0]));
|
||||
|
||||
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u);
|
||||
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y);
|
||||
v[1] = reduce_load_mip_0(tex);
|
||||
textureStore(mip_1, pix, vec4(v[1]));
|
||||
|
||||
tex = vec2(workgroup_id * 64u) + vec2(x * 2u, y * 2u + 32u);
|
||||
pix = vec2(workgroup_id * 32u) + vec2(x, y + 16u);
|
||||
v[2] = reduce_load_mip_0(tex);
|
||||
textureStore(mip_1, pix, vec4(v[2]));
|
||||
|
||||
tex = vec2(workgroup_id * 64u) + vec2(x * 2u + 32u, y * 2u + 32u);
|
||||
pix = vec2(workgroup_id * 32u) + vec2(x + 16u, y + 16u);
|
||||
v[3] = reduce_load_mip_0(tex);
|
||||
textureStore(mip_1, pix, vec4(v[3]));
|
||||
|
||||
if max_mip_level <= 1u { return; }
|
||||
|
||||
for (var i = 0u; i < 4u; i++) {
|
||||
intermediate_memory[x][y] = v[i];
|
||||
workgroupBarrier();
|
||||
if local_invocation_index < 64u {
|
||||
v[i] = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
|
||||
));
|
||||
pix = (workgroup_id * 16u) + vec2(
|
||||
x + (i % 2u) * 8u,
|
||||
y + (i / 2u) * 8u,
|
||||
);
|
||||
textureStore(mip_2, pix, vec4(v[i]));
|
||||
}
|
||||
workgroupBarrier();
|
||||
}
|
||||
|
||||
if local_invocation_index < 64u {
|
||||
intermediate_memory[x + 0u][y + 0u] = v[0];
|
||||
intermediate_memory[x + 8u][y + 0u] = v[1];
|
||||
intermediate_memory[x + 0u][y + 8u] = v[2];
|
||||
intermediate_memory[x + 8u][y + 8u] = v[3];
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
if max_mip_level <= 2u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_2(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 3u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_3(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 4u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_4(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 5u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_5(workgroup_id, local_invocation_index);
|
||||
}
|
||||
|
||||
fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
if local_invocation_index < 64u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
|
||||
));
|
||||
textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
if local_invocation_index < 16u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
|
||||
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
|
||||
));
|
||||
textureStore(mip_4, (workgroup_id * 4u) + vec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 4u + y][y * 4u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
if local_invocation_index < 4u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
|
||||
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
|
||||
));
|
||||
textureStore(mip_5, (workgroup_id * 2u) + vec2(x, y), vec4(v));
|
||||
intermediate_memory[x + y * 2u][0u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_5(workgroup_id: vec2u, local_invocation_index: u32) {
|
||||
if local_invocation_index < 1u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[0u][0u],
|
||||
intermediate_memory[1u][0u],
|
||||
intermediate_memory[2u][0u],
|
||||
intermediate_memory[3u][0u],
|
||||
));
|
||||
textureStore(mip_6, workgroup_id, vec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mips_6_and_7(x: u32, y: u32) {
|
||||
var v: vec4f;
|
||||
|
||||
var tex = vec2(x * 4u + 0u, y * 4u + 0u);
|
||||
var pix = vec2(x * 2u + 0u, y * 2u + 0u);
|
||||
v[0] = reduce_load_mip_6(tex);
|
||||
textureStore(mip_7, pix, vec4(v[0]));
|
||||
|
||||
tex = vec2(x * 4u + 2u, y * 4u + 0u);
|
||||
pix = vec2(x * 2u + 1u, y * 2u + 0u);
|
||||
v[1] = reduce_load_mip_6(tex);
|
||||
textureStore(mip_7, pix, vec4(v[1]));
|
||||
|
||||
tex = vec2(x * 4u + 0u, y * 4u + 2u);
|
||||
pix = vec2(x * 2u + 0u, y * 2u + 1u);
|
||||
v[2] = reduce_load_mip_6(tex);
|
||||
textureStore(mip_7, pix, vec4(v[2]));
|
||||
|
||||
tex = vec2(x * 4u + 2u, y * 4u + 2u);
|
||||
pix = vec2(x * 2u + 1u, y * 2u + 1u);
|
||||
v[3] = reduce_load_mip_6(tex);
|
||||
textureStore(mip_7, pix, vec4(v[3]));
|
||||
|
||||
if max_mip_level <= 7u { return; }
|
||||
|
||||
let vr = reduce_4(v);
|
||||
textureStore(mip_8, vec2(x, y), vec4(vr));
|
||||
intermediate_memory[x][y] = vr;
|
||||
}
|
||||
|
||||
fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) {
|
||||
if max_mip_level <= 8u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_8(x, y, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 9u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_9(x, y, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 10u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_10(x, y, local_invocation_index);
|
||||
|
||||
if max_mip_level <= 11u { return; }
|
||||
workgroupBarrier();
|
||||
downsample_mip_11(local_invocation_index);
|
||||
}
|
||||
|
||||
fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) {
|
||||
if local_invocation_index < 64u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u],
|
||||
));
|
||||
textureStore(mip_9, vec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_9(x: u32, y: u32, local_invocation_index: u32) {
|
||||
if local_invocation_index < 16u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
|
||||
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u],
|
||||
));
|
||||
textureStore(mip_10, vec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 4u + y][y * 4u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_10(x: u32, y: u32, local_invocation_index: u32) {
|
||||
if local_invocation_index < 4u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
|
||||
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u],
|
||||
));
|
||||
textureStore(mip_11, vec2(x, y), vec4(v));
|
||||
intermediate_memory[x + y * 2u][0u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn downsample_mip_11(local_invocation_index: u32) {
|
||||
if local_invocation_index < 1u {
|
||||
let v = reduce_4(vec4(
|
||||
intermediate_memory[0u][0u],
|
||||
intermediate_memory[1u][0u],
|
||||
intermediate_memory[2u][0u],
|
||||
intermediate_memory[3u][0u],
|
||||
));
|
||||
textureStore(mip_12, vec2(0u, 0u), vec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
fn remap_for_wave_reduction(a: u32) -> vec2u {
|
||||
return vec2(
|
||||
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
|
||||
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u),
|
||||
);
|
||||
}
|
||||
|
||||
fn reduce_load_mip_0(tex: vec2u) -> f32 {
|
||||
let uv = (vec2f(tex) + 0.5) / vec2f(textureDimensions(mip_0));
|
||||
return reduce_4(textureGather(mip_0, samplr, uv));
|
||||
}
|
||||
|
||||
fn reduce_load_mip_6(tex: vec2u) -> f32 {
|
||||
return reduce_4(vec4(
|
||||
textureLoad(mip_6, tex + vec2(0u, 0u)).r,
|
||||
textureLoad(mip_6, tex + vec2(0u, 1u)).r,
|
||||
textureLoad(mip_6, tex + vec2(1u, 0u)).r,
|
||||
textureLoad(mip_6, tex + vec2(1u, 1u)).r,
|
||||
));
|
||||
}
|
||||
|
||||
fn reduce_4(v: vec4f) -> f32 {
|
||||
return min(min(v.x, v.y), min(v.z, v.w));
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
meshlet_cluster_meshlet_ids,
|
||||
}
|
||||
|
||||
/// Writes out instance_id and meshlet_id to the global buffers for each cluster in the scene.
|
||||
|
||||
@compute
|
||||
@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 cluster per thread
|
||||
fn fill_cluster_buffers(
|
||||
|
|
|
@ -19,6 +19,7 @@ use bevy_ecs::{
|
|||
system::{Commands, Local, Query, Res, ResMut, Resource, SystemState},
|
||||
world::{FromWorld, World},
|
||||
};
|
||||
use bevy_math::{UVec2, Vec4Swizzles};
|
||||
use bevy_render::{
|
||||
render_resource::{binding_types::*, *},
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
|
@ -30,7 +31,7 @@ use bevy_transform::components::GlobalTransform;
|
|||
use bevy_utils::{default, HashMap, HashSet};
|
||||
use encase::internal::WriteInto;
|
||||
use std::{
|
||||
iter,
|
||||
array, iter,
|
||||
mem::size_of,
|
||||
ops::{DerefMut, Range},
|
||||
sync::{atomic::AtomicBool, Arc},
|
||||
|
@ -376,9 +377,8 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
});
|
||||
|
||||
let depth_pyramid_size = Extent3d {
|
||||
// Round down to the nearest power of 2 to ensure depth is conservative
|
||||
width: previous_power_of_2(view.viewport.z),
|
||||
height: previous_power_of_2(view.viewport.w),
|
||||
width: view.viewport.z.div_ceil(2),
|
||||
height: view.viewport.w.div_ceil(2),
|
||||
depth_or_array_layers: 1,
|
||||
};
|
||||
let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2);
|
||||
|
@ -391,24 +391,26 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
sample_count: 1,
|
||||
dimension: TextureDimension::D2,
|
||||
format: TextureFormat::R32Float,
|
||||
usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING,
|
||||
usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING,
|
||||
view_formats: &[],
|
||||
},
|
||||
);
|
||||
let depth_pyramid_mips = (0..depth_pyramid_mip_count)
|
||||
.map(|i| {
|
||||
let depth_pyramid_mips = array::from_fn(|i| {
|
||||
if (i as u32) < depth_pyramid_mip_count {
|
||||
depth_pyramid.texture.create_view(&TextureViewDescriptor {
|
||||
label: Some("meshlet_depth_pyramid_texture_view"),
|
||||
format: Some(TextureFormat::R32Float),
|
||||
dimension: Some(TextureViewDimension::D2),
|
||||
aspect: TextureAspect::All,
|
||||
base_mip_level: i,
|
||||
base_mip_level: i as u32,
|
||||
mip_level_count: Some(1),
|
||||
base_array_layer: 0,
|
||||
array_layer_count: Some(1),
|
||||
})
|
||||
})
|
||||
.collect::<Box<[TextureView]>>();
|
||||
} else {
|
||||
gpu_scene.depth_pyramid_dummy_texture.clone()
|
||||
}
|
||||
});
|
||||
let depth_pyramid_all_mips = depth_pyramid.default_view.clone();
|
||||
|
||||
let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) {
|
||||
|
@ -461,11 +463,13 @@ pub fn prepare_meshlet_per_frame_resources(
|
|||
visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(),
|
||||
depth_pyramid_all_mips,
|
||||
depth_pyramid_mips,
|
||||
depth_pyramid_mip_count,
|
||||
previous_depth_pyramid,
|
||||
material_depth_color: not_shadow_view
|
||||
.then(|| texture_cache.get(&render_device, material_depth_color)),
|
||||
material_depth: not_shadow_view
|
||||
.then(|| texture_cache.get(&render_device, material_depth)),
|
||||
view_size: view.viewport.zw(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -573,22 +577,26 @@ pub fn prepare_meshlet_view_bind_groups(
|
|||
(None, Some(shadow_view)) => &shadow_view.depth_attachment.view,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let downsample_depth = (0..view_resources.depth_pyramid_mips.len())
|
||||
.map(|i| {
|
||||
render_device.create_bind_group(
|
||||
"meshlet_downsample_depth_bind_group",
|
||||
&gpu_scene.downsample_depth_bind_group_layout,
|
||||
&BindGroupEntries::sequential((
|
||||
if i == 0 {
|
||||
view_depth_texture
|
||||
} else {
|
||||
&view_resources.depth_pyramid_mips[i - 1]
|
||||
},
|
||||
&gpu_scene.depth_pyramid_sampler,
|
||||
)),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let downsample_depth = render_device.create_bind_group(
|
||||
"meshlet_downsample_depth_bind_group",
|
||||
&gpu_scene.downsample_depth_bind_group_layout,
|
||||
&BindGroupEntries::sequential((
|
||||
view_depth_texture,
|
||||
&view_resources.depth_pyramid_mips[0],
|
||||
&view_resources.depth_pyramid_mips[1],
|
||||
&view_resources.depth_pyramid_mips[2],
|
||||
&view_resources.depth_pyramid_mips[3],
|
||||
&view_resources.depth_pyramid_mips[4],
|
||||
&view_resources.depth_pyramid_mips[5],
|
||||
&view_resources.depth_pyramid_mips[6],
|
||||
&view_resources.depth_pyramid_mips[7],
|
||||
&view_resources.depth_pyramid_mips[8],
|
||||
&view_resources.depth_pyramid_mips[9],
|
||||
&view_resources.depth_pyramid_mips[10],
|
||||
&view_resources.depth_pyramid_mips[11],
|
||||
&gpu_scene.depth_pyramid_sampler,
|
||||
)),
|
||||
);
|
||||
|
||||
let entries = BindGroupEntries::sequential((
|
||||
cluster_meshlet_ids.as_entire_binding(),
|
||||
|
@ -698,6 +706,7 @@ pub struct MeshletGpuScene {
|
|||
copy_material_depth_bind_group_layout: BindGroupLayout,
|
||||
material_draw_bind_group_layout: BindGroupLayout,
|
||||
depth_pyramid_sampler: Sampler,
|
||||
depth_pyramid_dummy_texture: TextureView,
|
||||
}
|
||||
|
||||
impl FromWorld for MeshletGpuScene {
|
||||
|
@ -783,13 +792,30 @@ impl FromWorld for MeshletGpuScene {
|
|||
),
|
||||
downsample_depth_bind_group_layout: render_device.create_bind_group_layout(
|
||||
"meshlet_downsample_depth_bind_group_layout",
|
||||
&BindGroupLayoutEntries::sequential(
|
||||
ShaderStages::FRAGMENT,
|
||||
&BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, {
|
||||
let write_only_r32float = || {
|
||||
texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly)
|
||||
};
|
||||
(
|
||||
texture_2d(TextureSampleType::Float { filterable: false }),
|
||||
texture_depth_2d(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
texture_storage_2d(
|
||||
TextureFormat::R32Float,
|
||||
StorageTextureAccess::ReadWrite,
|
||||
),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
write_only_r32float(),
|
||||
sampler(SamplerBindingType::NonFiltering),
|
||||
),
|
||||
),
|
||||
)
|
||||
}),
|
||||
),
|
||||
visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout(
|
||||
"meshlet_visibility_buffer_raster_bind_group_layout",
|
||||
|
@ -836,6 +862,31 @@ impl FromWorld for MeshletGpuScene {
|
|||
label: Some("meshlet_depth_pyramid_sampler"),
|
||||
..default()
|
||||
}),
|
||||
depth_pyramid_dummy_texture: render_device
|
||||
.create_texture(&TextureDescriptor {
|
||||
label: Some("meshlet_depth_pyramid_dummy_texture"),
|
||||
size: Extent3d {
|
||||
width: 1,
|
||||
height: 1,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: TextureDimension::D2,
|
||||
format: TextureFormat::R32Float,
|
||||
usage: TextureUsages::STORAGE_BINDING,
|
||||
view_formats: &[],
|
||||
})
|
||||
.create_view(&TextureViewDescriptor {
|
||||
label: Some("meshlet_depth_pyramid_dummy_texture_view"),
|
||||
format: Some(TextureFormat::R32Float),
|
||||
dimension: Some(TextureViewDimension::D2),
|
||||
aspect: TextureAspect::All,
|
||||
base_mip_level: 0,
|
||||
mip_level_count: Some(1),
|
||||
base_array_layer: 0,
|
||||
array_layer_count: Some(1),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -978,10 +1029,12 @@ pub struct MeshletViewResources {
|
|||
pub visibility_buffer_draw_indirect_args_second: Buffer,
|
||||
visibility_buffer_draw_triangle_buffer: Buffer,
|
||||
depth_pyramid_all_mips: TextureView,
|
||||
pub depth_pyramid_mips: Box<[TextureView]>,
|
||||
depth_pyramid_mips: [TextureView; 12],
|
||||
pub depth_pyramid_mip_count: u32,
|
||||
previous_depth_pyramid: TextureView,
|
||||
pub material_depth_color: Option<CachedTexture>,
|
||||
pub material_depth: Option<CachedTexture>,
|
||||
pub view_size: UVec2,
|
||||
}
|
||||
|
||||
#[derive(Component)]
|
||||
|
@ -990,18 +1043,8 @@ pub struct MeshletViewBindGroups {
|
|||
pub fill_cluster_buffers: BindGroup,
|
||||
pub culling_first: BindGroup,
|
||||
pub culling_second: BindGroup,
|
||||
pub downsample_depth: Box<[BindGroup]>,
|
||||
pub downsample_depth: BindGroup,
|
||||
pub visibility_buffer_raster: BindGroup,
|
||||
pub copy_material_depth: Option<BindGroup>,
|
||||
pub material_draw: Option<BindGroup>,
|
||||
}
|
||||
|
||||
fn previous_power_of_2(x: u32) -> u32 {
|
||||
// If x is a power of 2, halve it
|
||||
if x.count_ones() == 1 {
|
||||
x / 2
|
||||
} else {
|
||||
// Else calculate the largest power of 2 that is less than x
|
||||
1 << (31 - x.leading_zeros())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,8 @@ pub struct MeshletPipelines {
|
|||
fill_cluster_buffers: CachedComputePipelineId,
|
||||
cull_first: CachedComputePipelineId,
|
||||
cull_second: CachedComputePipelineId,
|
||||
downsample_depth: CachedRenderPipelineId,
|
||||
downsample_depth_first: CachedComputePipelineId,
|
||||
downsample_depth_second: CachedComputePipelineId,
|
||||
visibility_buffer_raster: CachedRenderPipelineId,
|
||||
visibility_buffer_raster_depth_only: CachedRenderPipelineId,
|
||||
visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId,
|
||||
|
@ -81,25 +82,33 @@ impl FromWorld for MeshletPipelines {
|
|||
entry_point: "cull_meshlets".into(),
|
||||
}),
|
||||
|
||||
downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
|
||||
label: Some("meshlet_downsample_depth".into()),
|
||||
layout: vec![downsample_depth_layout],
|
||||
push_constant_ranges: vec![],
|
||||
vertex: fullscreen_shader_vertex_state(),
|
||||
primitive: PrimitiveState::default(),
|
||||
depth_stencil: None,
|
||||
multisample: MultisampleState::default(),
|
||||
fragment: Some(FragmentState {
|
||||
downsample_depth_first: pipeline_cache.queue_compute_pipeline(
|
||||
ComputePipelineDescriptor {
|
||||
label: Some("meshlet_downsample_depth_first_pipeline".into()),
|
||||
layout: vec![downsample_depth_layout.clone()],
|
||||
push_constant_ranges: vec![PushConstantRange {
|
||||
stages: ShaderStages::COMPUTE,
|
||||
range: 0..4,
|
||||
}],
|
||||
shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
|
||||
shader_defs: vec![],
|
||||
entry_point: "downsample_depth".into(),
|
||||
targets: vec![Some(ColorTargetState {
|
||||
format: TextureFormat::R32Float,
|
||||
blend: None,
|
||||
write_mask: ColorWrites::ALL,
|
||||
})],
|
||||
}),
|
||||
}),
|
||||
entry_point: "downsample_depth_first".into(),
|
||||
},
|
||||
),
|
||||
|
||||
downsample_depth_second: pipeline_cache.queue_compute_pipeline(
|
||||
ComputePipelineDescriptor {
|
||||
label: Some("meshlet_downsample_depth_second_pipeline".into()),
|
||||
layout: vec![downsample_depth_layout],
|
||||
push_constant_ranges: vec![PushConstantRange {
|
||||
stages: ShaderStages::COMPUTE,
|
||||
range: 0..4,
|
||||
}],
|
||||
shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE,
|
||||
shader_defs: vec![],
|
||||
entry_point: "downsample_depth_second".into(),
|
||||
},
|
||||
),
|
||||
|
||||
visibility_buffer_raster: pipeline_cache.queue_render_pipeline(
|
||||
RenderPipelineDescriptor {
|
||||
|
@ -233,7 +242,7 @@ impl FromWorld for MeshletPipelines {
|
|||
),
|
||||
|
||||
copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor {
|
||||
label: Some("meshlet_copy_material_depth".into()),
|
||||
label: Some("meshlet_copy_material_depth_pipeline".into()),
|
||||
layout: vec![copy_material_depth_layout],
|
||||
push_constant_ranges: vec![],
|
||||
vertex: fullscreen_shader_vertex_state(),
|
||||
|
@ -264,7 +273,8 @@ impl MeshletPipelines {
|
|||
&ComputePipeline,
|
||||
&ComputePipeline,
|
||||
&ComputePipeline,
|
||||
&RenderPipeline,
|
||||
&ComputePipeline,
|
||||
&ComputePipeline,
|
||||
&RenderPipeline,
|
||||
&RenderPipeline,
|
||||
&RenderPipeline,
|
||||
|
@ -276,7 +286,8 @@ impl MeshletPipelines {
|
|||
pipeline_cache.get_compute_pipeline(pipeline.fill_cluster_buffers)?,
|
||||
pipeline_cache.get_compute_pipeline(pipeline.cull_first)?,
|
||||
pipeline_cache.get_compute_pipeline(pipeline.cull_second)?,
|
||||
pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?,
|
||||
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?,
|
||||
pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?,
|
||||
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?,
|
||||
pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?,
|
||||
pipeline_cache
|
||||
|
|
|
@ -54,6 +54,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
self.view_light_query.update_archetypes(world);
|
||||
}
|
||||
|
||||
// TODO: Reuse compute/render passes between logical passes where possible, as they're expensive
|
||||
fn run(
|
||||
&self,
|
||||
graph: &mut RenderGraphContext,
|
||||
|
@ -77,7 +78,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
fill_cluster_buffers_pipeline,
|
||||
culling_first_pipeline,
|
||||
culling_second_pipeline,
|
||||
downsample_depth_pipeline,
|
||||
downsample_depth_first_pipeline,
|
||||
downsample_depth_second_pipeline,
|
||||
visibility_buffer_raster_pipeline,
|
||||
visibility_buffer_raster_depth_only_pipeline,
|
||||
visibility_buffer_raster_depth_only_clamp_ortho,
|
||||
|
@ -137,7 +139,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
render_context,
|
||||
meshlet_view_resources,
|
||||
meshlet_view_bind_groups,
|
||||
downsample_depth_pipeline,
|
||||
downsample_depth_first_pipeline,
|
||||
downsample_depth_second_pipeline,
|
||||
);
|
||||
cull_pass(
|
||||
"culling_second",
|
||||
|
@ -170,7 +173,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
render_context,
|
||||
meshlet_view_resources,
|
||||
meshlet_view_bind_groups,
|
||||
downsample_depth_pipeline,
|
||||
downsample_depth_first_pipeline,
|
||||
downsample_depth_second_pipeline,
|
||||
);
|
||||
render_context.command_encoder().pop_debug_group();
|
||||
|
||||
|
@ -225,7 +229,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
render_context,
|
||||
meshlet_view_resources,
|
||||
meshlet_view_bind_groups,
|
||||
downsample_depth_pipeline,
|
||||
downsample_depth_first_pipeline,
|
||||
downsample_depth_second_pipeline,
|
||||
);
|
||||
cull_pass(
|
||||
"culling_second",
|
||||
|
@ -251,7 +256,8 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
render_context,
|
||||
meshlet_view_resources,
|
||||
meshlet_view_bind_groups,
|
||||
downsample_depth_pipeline,
|
||||
downsample_depth_first_pipeline,
|
||||
downsample_depth_second_pipeline,
|
||||
);
|
||||
render_context.command_encoder().pop_debug_group();
|
||||
}
|
||||
|
@ -260,7 +266,6 @@ impl Node for MeshletVisibilityBufferRasterPassNode {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Reuse same compute pass as cull_pass
|
||||
fn fill_cluster_buffers_pass(
|
||||
render_context: &mut RenderContext,
|
||||
fill_cluster_buffers_bind_group: &BindGroup,
|
||||
|
@ -380,35 +385,30 @@ fn downsample_depth(
|
|||
render_context: &mut RenderContext,
|
||||
meshlet_view_resources: &MeshletViewResources,
|
||||
meshlet_view_bind_groups: &MeshletViewBindGroups,
|
||||
downsample_depth_pipeline: &RenderPipeline,
|
||||
downsample_depth_first_pipeline: &ComputePipeline,
|
||||
downsample_depth_second_pipeline: &ComputePipeline,
|
||||
) {
|
||||
render_context
|
||||
.command_encoder()
|
||||
.push_debug_group("meshlet_downsample_depth");
|
||||
let command_encoder = render_context.command_encoder();
|
||||
let mut downsample_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
|
||||
label: Some("downsample_depth"),
|
||||
timestamp_writes: None,
|
||||
});
|
||||
downsample_pass.set_pipeline(downsample_depth_first_pipeline);
|
||||
downsample_pass.set_push_constants(
|
||||
0,
|
||||
&meshlet_view_resources.depth_pyramid_mip_count.to_le_bytes(),
|
||||
);
|
||||
downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]);
|
||||
downsample_pass.dispatch_workgroups(
|
||||
meshlet_view_resources.view_size.x.div_ceil(64),
|
||||
meshlet_view_resources.view_size.y.div_ceil(64),
|
||||
1,
|
||||
);
|
||||
|
||||
for i in 0..meshlet_view_resources.depth_pyramid_mips.len() {
|
||||
let downsample_pass = RenderPassDescriptor {
|
||||
label: Some("downsample_depth"),
|
||||
color_attachments: &[Some(RenderPassColorAttachment {
|
||||
view: &meshlet_view_resources.depth_pyramid_mips[i],
|
||||
resolve_target: None,
|
||||
ops: Operations {
|
||||
load: LoadOp::Clear(LinearRgba::BLACK.into()),
|
||||
store: StoreOp::Store,
|
||||
},
|
||||
})],
|
||||
depth_stencil_attachment: None,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
};
|
||||
|
||||
let mut downsample_pass = render_context.begin_tracked_render_pass(downsample_pass);
|
||||
downsample_pass.set_render_pipeline(downsample_depth_pipeline);
|
||||
downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth[i], &[]);
|
||||
downsample_pass.draw(0..3, 0..1);
|
||||
if meshlet_view_resources.depth_pyramid_mip_count >= 7 {
|
||||
downsample_pass.set_pipeline(downsample_depth_second_pipeline);
|
||||
downsample_pass.dispatch_workgroups(1, 1, 1);
|
||||
}
|
||||
|
||||
render_context.command_encoder().pop_debug_group();
|
||||
}
|
||||
|
||||
fn copy_material_depth_pass(
|
||||
|
|
Loading…
Reference in a new issue