mirror of
https://github.com/bevyengine/bevy
synced 2025-01-17 07:34:07 +00:00
d235d41af1
The "uberbuffers" PR #14257 caused some examples to fail intermittently for different reasons: 1. `morph_targets` could fail because vertex displacements for morph targets are keyed off the vertex index. With buffer packing, the vertex index can vary based on the position in the buffer, which caused the morph targets to be potentially incorrect. The solution is to include the first vertex index with the `MeshUniform` (and `MeshInputUniform` if GPU preprocessing is in use), so that the shader can calculate the true vertex index before performing the morph operation. This results in wasted space in `MeshUniform`, which is unfortunate, but we'll soon be filling in the padding with the ID of the material when bindless textures land, so this had to happen sooner or later anyhow. Including the vertex index in the `MeshInputUniform` caused an ordering problem. The `MeshInputUniform` was created during the extraction phase, before the allocations occurred, so the extraction logic didn't know where the mesh vertex data was going to end up. The solution is to move the `MeshInputUniform` creation (the `collect_meshes_for_gpu_building` system) to after the allocations phase. This should be better for parallelism anyhow, because it allows the extraction phase to finish quicker. It's also something we'll have to do for bindless in any event. 2. The `lines` and `fog_volumes` examples could fail because their custom drawing nodes weren't updated to supply the vertex and index offsets in their `draw_indexed` and `draw` calls. This commit fixes this oversight. Fixes #14366.
194 lines
7.2 KiB
WebGPU Shading Language
194 lines
7.2 KiB
WebGPU Shading Language
// GPU mesh uniform building.
|
|
//
|
|
// This is a compute shader that expands each `MeshInputUniform` out to a full
|
|
// `MeshUniform` for each view before rendering. (Thus `MeshInputUniform`
|
|
// and `MeshUniform` are in a 1:N relationship.) It runs in parallel for all
|
|
// meshes for all views. As part of this process, the shader gathers each
|
|
// mesh's transform on the previous frame and writes it into the `MeshUniform`
|
|
// so that TAA works.
|
|
|
|
#import bevy_pbr::mesh_types::Mesh
|
|
#import bevy_render::maths
|
|
#import bevy_render::view::View
|
|
|
|
// Per-frame data that the CPU supplies to the GPU.
|
|
struct MeshInput {
|
|
// The model transform.
|
|
world_from_local: mat3x4<f32>,
|
|
// The lightmap UV rect, packed into 64 bits.
|
|
lightmap_uv_rect: vec2<u32>,
|
|
// Various flags.
|
|
flags: u32,
|
|
// The index of this mesh's `MeshInput` in the `previous_input` array, if
|
|
// applicable. If not present, this is `u32::MAX`.
|
|
previous_input_index: u32,
|
|
first_vertex_index: u32,
|
|
pad_a: u32,
|
|
pad_b: u32,
|
|
pad_c: u32,
|
|
}
|
|
|
|
// Information about each mesh instance needed to cull it on GPU.
|
|
//
|
|
// At the moment, this just consists of its axis-aligned bounding box (AABB).
|
|
struct MeshCullingData {
|
|
// The 3D center of the AABB in model space, padded with an extra unused
|
|
// float value.
|
|
aabb_center: vec4<f32>,
|
|
// The 3D extents of the AABB in model space, divided by two, padded with
|
|
// an extra unused float value.
|
|
aabb_half_extents: vec4<f32>,
|
|
}
|
|
|
|
// One invocation of this compute shader: i.e. one mesh instance in a view.
|
|
struct PreprocessWorkItem {
|
|
// The index of the `MeshInput` in the `current_input` buffer that we read
|
|
// from.
|
|
input_index: u32,
|
|
// In direct mode, the index of the `Mesh` in `output` that we write to. In
|
|
// indirect mode, the index of the `IndirectParameters` in
|
|
// `indirect_parameters` that we write to.
|
|
output_index: u32,
|
|
}
|
|
|
|
// The `wgpu` indirect parameters structure. This is a union of two structures.
|
|
// For more information, see the corresponding comment in
|
|
// `gpu_preprocessing.rs`.
|
|
struct IndirectParameters {
|
|
// `vertex_count` or `index_count`.
|
|
data0: u32,
|
|
// `instance_count` in both structures.
|
|
instance_count: atomic<u32>,
|
|
// `first_vertex` in both structures.
|
|
first_vertex: u32,
|
|
// `first_instance` or `base_vertex`.
|
|
data1: u32,
|
|
// A read-only copy of `instance_index`.
|
|
instance_index: u32,
|
|
}
|
|
|
|
// The current frame's `MeshInput`.
|
|
@group(0) @binding(0) var<storage> current_input: array<MeshInput>;
|
|
// The `MeshInput` values from the previous frame.
|
|
@group(0) @binding(1) var<storage> previous_input: array<MeshInput>;
|
|
// Indices into the `MeshInput` buffer.
|
|
//
|
|
// There may be many indices that map to the same `MeshInput`.
|
|
@group(0) @binding(2) var<storage> work_items: array<PreprocessWorkItem>;
|
|
// The output array of `Mesh`es.
|
|
@group(0) @binding(3) var<storage, read_write> output: array<Mesh>;
|
|
|
|
#ifdef INDIRECT
|
|
// The array of indirect parameters for drawcalls.
|
|
@group(0) @binding(4) var<storage, read_write> indirect_parameters: array<IndirectParameters>;
|
|
#endif
|
|
|
|
#ifdef FRUSTUM_CULLING
|
|
// Data needed to cull the meshes.
|
|
//
|
|
// At the moment, this consists only of AABBs.
|
|
@group(0) @binding(5) var<storage> mesh_culling_data: array<MeshCullingData>;
|
|
|
|
// The view data, including the view matrix.
|
|
@group(0) @binding(6) var<uniform> view: View;
|
|
|
|
// Returns true if the view frustum intersects an oriented bounding box (OBB).
|
|
//
|
|
// `aabb_center.w` should be 1.0.
|
|
fn view_frustum_intersects_obb(
|
|
world_from_local: mat4x4<f32>,
|
|
aabb_center: vec4<f32>,
|
|
aabb_half_extents: vec3<f32>,
|
|
) -> bool {
|
|
|
|
for (var i = 0; i < 5; i += 1) {
|
|
// Calculate relative radius of the sphere associated with this plane.
|
|
let plane_normal = view.frustum[i];
|
|
let relative_radius = dot(
|
|
abs(
|
|
vec3(
|
|
dot(plane_normal, world_from_local[0]),
|
|
dot(plane_normal, world_from_local[1]),
|
|
dot(plane_normal, world_from_local[2]),
|
|
)
|
|
),
|
|
aabb_half_extents
|
|
);
|
|
|
|
// Check the frustum plane.
|
|
if (!maths::sphere_intersects_plane_half_space(
|
|
plane_normal, aabb_center, relative_radius)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
@compute
|
|
@workgroup_size(64)
|
|
fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
|
|
// Figure out our instance index. If this thread doesn't correspond to any
|
|
// index, bail.
|
|
let instance_index = global_invocation_id.x;
|
|
if (instance_index >= arrayLength(&work_items)) {
|
|
return;
|
|
}
|
|
|
|
// Unpack.
|
|
let input_index = work_items[instance_index].input_index;
|
|
let output_index = work_items[instance_index].output_index;
|
|
let world_from_local_affine_transpose = current_input[input_index].world_from_local;
|
|
let world_from_local = maths::affine3_to_square(world_from_local_affine_transpose);
|
|
|
|
// Cull if necessary.
|
|
#ifdef FRUSTUM_CULLING
|
|
let aabb_center = mesh_culling_data[input_index].aabb_center.xyz;
|
|
let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz;
|
|
|
|
// Do an OBB-based frustum cull.
|
|
let model_center = world_from_local * vec4(aabb_center, 1.0);
|
|
if (!view_frustum_intersects_obb(world_from_local, model_center, aabb_half_extents)) {
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
// Calculate inverse transpose.
|
|
let local_from_world_transpose = transpose(maths::inverse_affine3(transpose(
|
|
world_from_local_affine_transpose)));
|
|
|
|
// Pack inverse transpose.
|
|
let local_from_world_transpose_a = mat2x4<f32>(
|
|
vec4<f32>(local_from_world_transpose[0].xyz, local_from_world_transpose[1].x),
|
|
vec4<f32>(local_from_world_transpose[1].yz, local_from_world_transpose[2].xy));
|
|
let local_from_world_transpose_b = local_from_world_transpose[2].z;
|
|
|
|
// Look up the previous model matrix.
|
|
let previous_input_index = current_input[input_index].previous_input_index;
|
|
var previous_world_from_local: mat3x4<f32>;
|
|
if (previous_input_index == 0xffffffff) {
|
|
previous_world_from_local = world_from_local_affine_transpose;
|
|
} else {
|
|
previous_world_from_local = previous_input[previous_input_index].world_from_local;
|
|
}
|
|
|
|
// Figure out the output index. In indirect mode, this involves bumping the
|
|
// instance index in the indirect parameters structure. Otherwise, this
|
|
// index was directly supplied to us.
|
|
#ifdef INDIRECT
|
|
let mesh_output_index = indirect_parameters[output_index].instance_index +
|
|
atomicAdd(&indirect_parameters[output_index].instance_count, 1u);
|
|
#else
|
|
let mesh_output_index = output_index;
|
|
#endif
|
|
|
|
// Write the output.
|
|
output[mesh_output_index].world_from_local = world_from_local_affine_transpose;
|
|
output[mesh_output_index].previous_world_from_local = previous_world_from_local;
|
|
output[mesh_output_index].local_from_world_transpose_a = local_from_world_transpose_a;
|
|
output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b;
|
|
output[mesh_output_index].flags = current_input[input_index].flags;
|
|
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
|
|
output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index;
|
|
}
|