bevy/crates/bevy_pbr/src/render/mesh_preprocess.wgsl
Patrick Walton d235d41af1
Fix the example regressions from packed growable buffers. (#14375)
The "uberbuffers" PR #14257 caused some examples to fail intermittently
for different reasons:

1. `morph_targets` could fail because vertex displacements for morph
targets are keyed off the vertex index. With buffer packing, the vertex
index can vary based on the position in the buffer, which caused the
morph targets to be potentially incorrect. The solution is to include
the first vertex index with the `MeshUniform` (and `MeshInputUniform` if
GPU preprocessing is in use), so that the shader can calculate the true
vertex index before performing the morph operation. This results in
wasted space in `MeshUniform`, which is unfortunate, but we'll soon be
filling in the padding with the ID of the material when bindless
textures land, so this had to happen sooner or later anyhow.

Including the vertex index in the `MeshInputUniform` caused an ordering
problem. The `MeshInputUniform` was created during the extraction phase,
before the allocations occurred, so the extraction logic didn't know
where the mesh vertex data was going to end up. The solution is to move
the `MeshInputUniform` creation (the `collect_meshes_for_gpu_building`
system) to after the allocations phase. This should be better for
parallelism anyhow, because it allows the extraction phase to finish
quicker. It's also something we'll have to do for bindless in any event.

2. The `lines` and `fog_volumes` examples could fail because their
custom drawing nodes weren't updated to supply the vertex and index
offsets in their `draw_indexed` and `draw` calls. This commit fixes this
oversight.

Fixes #14366.
2024-07-22 18:55:51 +00:00

194 lines
7.2 KiB
WebGPU Shading Language

// GPU mesh uniform building.
//
// This is a compute shader that expands each `MeshInputUniform` out to a full
// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform`
// and `MeshUniform` are in a 1:N relationship.) It runs in parallel for all
// meshes for all views. As part of this process, the shader gathers each
// mesh's transform on the previous frame and writes it into the `MeshUniform`
// so that TAA works.
#import bevy_pbr::mesh_types::Mesh
#import bevy_render::maths
#import bevy_render::view::View
// Per-frame data that the CPU supplies to the GPU.
struct MeshInput {
// The model transform.
world_from_local: mat3x4<f32>,
// The lightmap UV rect, packed into 64 bits.
lightmap_uv_rect: vec2<u32>,
// Various flags.
flags: u32,
// The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`.
previous_input_index: u32,
first_vertex_index: u32,
pad_a: u32,
pad_b: u32,
pad_c: u32,
}
// Information about each mesh instance needed to cull it on GPU.
//
// At the moment, this just consists of its axis-aligned bounding box (AABB).
struct MeshCullingData {
// The 3D center of the AABB in model space, padded with an extra unused
// float value.
aabb_center: vec4<f32>,
// The 3D extents of the AABB in model space, divided by two, padded with
// an extra unused float value.
aabb_half_extents: vec4<f32>,
}
// One invocation of this compute shader: i.e. one mesh instance in a view.
struct PreprocessWorkItem {
// The index of the `MeshInput` in the `current_input` buffer that we read
// from.
input_index: u32,
// In direct mode, the index of the `Mesh` in `output` that we write to. In
// indirect mode, the index of the `IndirectParameters` in
// `indirect_parameters` that we write to.
output_index: u32,
}
// The `wgpu` indirect parameters structure. This is a union of two structures.
// For more information, see the corresponding comment in
// `gpu_preprocessing.rs`.
struct IndirectParameters {
// `vertex_count` or `index_count`.
data0: u32,
// `instance_count` in both structures.
instance_count: atomic<u32>,
// `first_vertex` in both structures.
first_vertex: u32,
// `first_instance` or `base_vertex`.
data1: u32,
// A read-only copy of `instance_index`.
instance_index: u32,
}
// The current frame's `MeshInput`.
@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
// The `MeshInput` values from the previous frame.
@group(0) @binding(1) var<storage> previous_input: array<MeshInput>;
// Indices into the `MeshInput` buffer.
//
// There may be many indices that map to the same `MeshInput`.
@group(0) @binding(2) var<storage> work_items: array<PreprocessWorkItem>;
// The output array of `Mesh`es.
@group(0) @binding(3) var<storage, read_write> output: array<Mesh>;
#ifdef INDIRECT
// The array of indirect parameters for drawcalls.
@group(0) @binding(4) var<storage, read_write> indirect_parameters: array<IndirectParameters>;
#endif
#ifdef FRUSTUM_CULLING
// Data needed to cull the meshes.
//
// At the moment, this consists only of AABBs.
@group(0) @binding(5) var<storage> mesh_culling_data: array<MeshCullingData>;
// The view data, including the view matrix.
@group(0) @binding(6) var<uniform> view: View;
// Returns true if the view frustum intersects an oriented bounding box (OBB).
//
// `aabb_center.w` should be 1.0.
fn view_frustum_intersects_obb(
world_from_local: mat4x4<f32>,
aabb_center: vec4<f32>,
aabb_half_extents: vec3<f32>,
) -> bool {
for (var i = 0; i < 5; i += 1) {
// Calculate relative radius of the sphere associated with this plane.
let plane_normal = view.frustum[i];
let relative_radius = dot(
abs(
vec3(
dot(plane_normal, world_from_local[0]),
dot(plane_normal, world_from_local[1]),
dot(plane_normal, world_from_local[2]),
)
),
aabb_half_extents
);
// Check the frustum plane.
if (!maths::sphere_intersects_plane_half_space(
plane_normal, aabb_center, relative_radius)) {
return false;
}
}
return true;
}
#endif
@compute
@workgroup_size(64)
fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
// Figure out our instance index. If this thread doesn't correspond to any
// index, bail.
let instance_index = global_invocation_id.x;
if (instance_index >= arrayLength(&work_items)) {
return;
}
// Unpack.
let input_index = work_items[instance_index].input_index;
let output_index = work_items[instance_index].output_index;
let world_from_local_affine_transpose = current_input[input_index].world_from_local;
let world_from_local = maths::affine3_to_square(world_from_local_affine_transpose);
// Cull if necessary.
#ifdef FRUSTUM_CULLING
let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
// Do an OBB-based frustum cull.
let model_center = world_from_local * vec4(aabb_center, 1.0);
if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
return;
}
#endif
// Calculate inverse transpose.
let local_from_world_transpose = transpose(maths::inverse_affine3(transpose(
world_from_local_affine_transpose)));
// Pack inverse transpose.
let local_from_world_transpose_a = mat2x4<f32>(
vec4<f32>(local_from_world_transpose[0].xyz, local_from_world_transpose[1].x),
vec4<f32>(local_from_world_transpose[1].yz, local_from_world_transpose[2].xy));
let local_from_world_transpose_b = local_from_world_transpose[2].z;
// Look up the previous model matrix.
let previous_input_index = current_input[input_index].previous_input_index;
var previous_world_from_local: mat3x4<f32>;
if (previous_input_index == 0xffffffff) {
previous_world_from_local = world_from_local_affine_transpose;
} else {
previous_world_from_local = previous_input[previous_input_index].world_from_local;
}
// Figure out the output index. In indirect mode, this involves bumping the
// instance index in the indirect parameters structure. Otherwise, this
// index was directly supplied to us.
#ifdef INDIRECT
let mesh_output_index = indirect_parameters[output_index].instance_index +
atomicAdd(&indirect_parameters[output_index].instance_count, 1u);
#else
let mesh_output_index = output_index;
#endif
// Write the output.
output[mesh_output_index].world_from_local = world_from_local_affine_transpose;
output[mesh_output_index].previous_world_from_local = previous_world_from_local;
output[mesh_output_index].local_from_world_transpose_a = local_from_world_transpose_a;
output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b;
output[mesh_output_index].flags = current_input[input_index].flags;
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index;
}