2024-03-25 19:08:27 +00:00
|
|
|
#define_import_path bevy_pbr::meshlet_visibility_buffer_resolve
|
|
|
|
|
|
|
|
#import bevy_pbr::{
|
|
|
|
meshlet_bindings::{
|
|
|
|
meshlet_visibility_buffer,
|
2024-05-04 19:56:19 +00:00
|
|
|
meshlet_cluster_meshlet_ids,
|
2024-03-25 19:08:27 +00:00
|
|
|
meshlets,
|
|
|
|
meshlet_vertex_ids,
|
|
|
|
meshlet_vertex_data,
|
2024-05-04 19:56:19 +00:00
|
|
|
meshlet_cluster_instance_ids,
|
2024-03-25 19:08:27 +00:00
|
|
|
meshlet_instance_uniforms,
|
|
|
|
get_meshlet_index,
|
|
|
|
unpack_meshlet_vertex,
|
|
|
|
},
|
|
|
|
mesh_view_bindings::view,
|
2024-06-10 20:18:43 +00:00
|
|
|
mesh_functions::{mesh_position_local_to_world, sign_determinant_model_3x3m},
|
|
|
|
mesh_types::{Mesh, MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT},
|
2024-03-25 19:08:27 +00:00
|
|
|
view_transformations::{position_world_to_clip, frag_coord_to_ndc},
|
|
|
|
}
|
|
|
|
#import bevy_render::maths::{affine3_to_square, mat2x4_f32_to_mat3x3_unpack}
|
|
|
|
|
|
|
|
#ifdef PREPASS_FRAGMENT
|
|
|
|
#ifdef MOTION_VECTOR_PREPASS
|
|
|
|
#import bevy_pbr::{
|
2024-04-07 18:59:16 +00:00
|
|
|
prepass_bindings::previous_view_uniforms,
|
2024-03-25 19:08:27 +00:00
|
|
|
pbr_prepass_functions::calculate_motion_vector,
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/// Functions to be used by materials for reading from a meshlet visibility buffer texture.
|
|
|
|
|
|
|
|
#ifdef MESHLET_MESH_MATERIAL_PASS
|
|
|
|
struct PartialDerivatives {
|
|
|
|
barycentrics: vec3<f32>,
|
|
|
|
ddx: vec3<f32>,
|
|
|
|
ddy: vec3<f32>,
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://github.com/ConfettiFX/The-Forge/blob/2d453f376ef278f66f97cbaf36c0d12e4361e275/Examples_3/Visibility_Buffer/src/Shaders/FSL/visibilityBuffer_shade.frag.fsl#L83-L139
|
|
|
|
fn compute_partial_derivatives(vertex_clip_positions: array<vec4<f32>, 3>, ndc_uv: vec2<f32>, screen_size: vec2<f32>) -> PartialDerivatives {
|
|
|
|
var result: PartialDerivatives;
|
|
|
|
|
|
|
|
let inv_w = 1.0 / vec3(vertex_clip_positions[0].w, vertex_clip_positions[1].w, vertex_clip_positions[2].w);
|
|
|
|
let ndc_0 = vertex_clip_positions[0].xy * inv_w[0];
|
|
|
|
let ndc_1 = vertex_clip_positions[1].xy * inv_w[1];
|
|
|
|
let ndc_2 = vertex_clip_positions[2].xy * inv_w[2];
|
|
|
|
|
|
|
|
let inv_det = 1.0 / determinant(mat2x2(ndc_2 - ndc_1, ndc_0 - ndc_1));
|
|
|
|
result.ddx = vec3(ndc_1.y - ndc_2.y, ndc_2.y - ndc_0.y, ndc_0.y - ndc_1.y) * inv_det * inv_w;
|
|
|
|
result.ddy = vec3(ndc_2.x - ndc_1.x, ndc_0.x - ndc_2.x, ndc_1.x - ndc_0.x) * inv_det * inv_w;
|
|
|
|
|
|
|
|
var ddx_sum = dot(result.ddx, vec3(1.0));
|
|
|
|
var ddy_sum = dot(result.ddy, vec3(1.0));
|
|
|
|
|
|
|
|
let delta_v = ndc_uv - ndc_0;
|
|
|
|
let interp_inv_w = inv_w.x + delta_v.x * ddx_sum + delta_v.y * ddy_sum;
|
|
|
|
let interp_w = 1.0 / interp_inv_w;
|
|
|
|
|
|
|
|
result.barycentrics = vec3(
|
|
|
|
interp_w * (delta_v.x * result.ddx.x + delta_v.y * result.ddy.x + inv_w.x),
|
|
|
|
interp_w * (delta_v.x * result.ddx.y + delta_v.y * result.ddy.y),
|
|
|
|
interp_w * (delta_v.x * result.ddx.z + delta_v.y * result.ddy.z),
|
|
|
|
);
|
|
|
|
|
|
|
|
result.ddx *= 2.0 / screen_size.x;
|
|
|
|
result.ddy *= 2.0 / screen_size.y;
|
|
|
|
ddx_sum *= 2.0 / screen_size.x;
|
|
|
|
ddy_sum *= 2.0 / screen_size.y;
|
|
|
|
|
|
|
|
let interp_ddx_w = 1.0 / (interp_inv_w + ddx_sum);
|
|
|
|
let interp_ddy_w = 1.0 / (interp_inv_w + ddy_sum);
|
|
|
|
|
|
|
|
result.ddx = interp_ddx_w * (result.barycentrics * interp_inv_w + result.ddx) - result.barycentrics;
|
|
|
|
result.ddy = interp_ddy_w * (result.barycentrics * interp_inv_w + result.ddy) - result.barycentrics;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct VertexOutput {
|
|
|
|
position: vec4<f32>,
|
|
|
|
world_position: vec4<f32>,
|
|
|
|
world_normal: vec3<f32>,
|
|
|
|
uv: vec2<f32>,
|
|
|
|
ddx_uv: vec2<f32>,
|
|
|
|
ddy_uv: vec2<f32>,
|
|
|
|
world_tangent: vec4<f32>,
|
|
|
|
mesh_flags: u32,
|
2024-06-10 13:06:08 +00:00
|
|
|
cluster_id: u32,
|
2024-03-25 19:08:27 +00:00
|
|
|
#ifdef PREPASS_FRAGMENT
|
|
|
|
#ifdef MOTION_VECTOR_PREPASS
|
|
|
|
motion_vector: vec2<f32>,
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Load the visibility buffer texture and resolve it into a VertexOutput.
|
|
|
|
fn resolve_vertex_output(frag_coord: vec4<f32>) -> VertexOutput {
|
Meshlet software raster + start of cleanup (#14623)
# Objective
- Faster meshlet rasterization path for small triangles
- Avoid having to allocate and write out a triangle buffer
- Refactor gpu_scene.rs
## Solution
- Replace the 32bit visbuffer texture with a 64bit visbuffer buffer,
where the left 32 bits encode depth, and the right 32 bits encode the
existing cluster + triangle IDs. Can't use 64bit textures, wgpu/naga
doesn't support atomic ops on textures yet.
- Instead of writing out a buffer of packed cluster + triangle IDs (per
triangle) to raster, the culling pass now writes out a buffer of just
cluster IDs (per cluster, so less memory allocated, cheaper to write
out).
- Clusters for software raster are allocated from the left side
- Clusters for hardware raster are allocated in the same buffer, from
the right side
- The buffer size is fixed at MeshletPlugin build time, and should be
set to a reasonable value for your scene (no warning on overflow, and no
good way to determine what value you need outside of renderdoc - I plan
to fix this in a future PR adding a meshlet stats overlay)
- Currently I don't have a heuristic for software vs hardware raster
selection for each cluster. The existing code is just a placeholder. I
need to profile on a release scene and come up with a heuristic,
probably in a future PR.
- The culling shader is getting pretty hard to follow at this point, but
I don't want to spend time improving it as the entire shader/pass is
getting rewritten/replaced in the near future.
- Software raster is a compute workgroup per-cluster. Each workgroup
loads and transforms the <=64 vertices of the cluster, and then
rasterizes the <=64 triangles of the cluster.
- Two variants are implemented: Scanline for clusters with any larger
triangles (still smaller than hardware is good at), and brute-force for
very very tiny triangles
- Once the shader determines that a pixel should be filled in, it does
an atomicMax() on the visbuffer to store the results, copying how Nanite
works
- On devices with a low max workgroups per dispatch limit, an extra
compute pass is inserted before software raster to convert from a 1d to
2d dispatch (I don't think 3d would ever be necessary).
- I haven't implemented the top-left rule or subpixel precision yet, I'm
leaving that for a future PR since I get usable results without it for
now
- Resources used:
https://kristoffer-dyrkorn.github.io/triangle-rasterizer and chapters
6-8 of
https://fgiesen.wordpress.com/2013/02/17/optimizing-sw-occlusion-culling-index
- Hardware raster now spawns 64*3 vertex invocations per meshlet,
instead of the actual meshlet vertex count. Extra invocations just
early-exit.
- While this is slower than the existing system, hardware draws should
be rare now that software raster is usable, and it saves a ton of memory
using the unified cluster ID buffer. This would be fixed if wgpu had
support for mesh shaders.
- Instead of writing to a color+depth attachment, the hardware raster
pass also does the same atomic visbuffer writes that software raster
uses.
- We have to bind a dummy render target anyways, as wgpu doesn't
currently support render passes without any attachments
- Material IDs are no longer written out during the main rasterization
passes.
- If we had async compute queues, we could overlap the software and
hardware raster passes.
- New material and depth resolve passes run at the end of the visbuffer
node, and write out view depth and material ID depth textures
### Misc changes
- Fixed cluster culling importing, but never actually using the previous
view uniforms when doing occlusion culling
- Fixed incorrectly adding the LOD error twice when building the meshlet
mesh
- Splitup gpu_scene module into meshlet_mesh_manager, instance_manager,
and resource_manager
- resource_manager is still too complex and inefficient (extract and
prepare are way too expensive). I plan on improving this in a future PR,
but for now ResourceManager is mostly a 1:1 port of the leftover
MeshletGpuScene bits.
- Material draw passes have been renamed to the more accurate material
shade pass, as well as some other misc renaming (in the future, these
will be compute shaders even, and not actual draw calls)
---
## Migration Guide
- TBD (ask me at the end of the release for meshlet changes as a whole)
---------
Co-authored-by: vero <email@atlasdostal.com>
2024-08-26 17:54:34 +00:00
|
|
|
let frag_coord_1d = u32(frag_coord.y) * u32(view.viewport.z) + u32(frag_coord.x);
|
|
|
|
let packed_ids = u32(meshlet_visibility_buffer[frag_coord_1d]); // TODO: Might be faster to load the correct u32 directly
|
More triangles/vertices per meshlet (#15023)
### Builder changes
- Increased meshlet max vertices/triangles from 64v/64t to 255v/128t
(meshoptimizer won't allow 256v sadly). This gives us a much greater
percentage of meshlets with max triangle count (128). Still not perfect,
we still end up with some tiny <=10 triangle meshlets that never really
get simplified, but it's progress.
- Removed the error target limit. Now we allow meshoptimizer to simplify
as much as possible. No reason to cap this out, as the cluster culling
code will choose a good LOD level anyways. Again leads to higher quality
LOD trees.
- After some discussion and consulting the Nanite slides again, changed
meshlet group error from _adding_ the max child's error to the group
error, to doing `group_error = max(group_error, max_child_error)`. Error
is already cumulative between LODs as the edges we're collapsing during
simplification get longer each time.
- Bumped the 65% simplification threshold to allow up to 95% of the
original geometry (e.g. accept simplification as valid even if we only
simplified 5% of the triangles). This gives us closer to
log2(initial_meshlet_count) LOD levels, and fewer meshlet roots in the
DAG.
Still more work to be done in the future here. Maybe trying METIS for
meshlet building instead of meshoptimizer.
Using ~8 clusters per group instead of ~4 might also make a big
difference. The Nanite slides say that they have 8-32 meshlets per
group, suggesting some kind of heuristic. Unfortunately meshopt's
compute_cluster_bounds won't work with large groups atm
(https://github.com/zeux/meshoptimizer/discussions/750#discussioncomment-10562641)
so hard to test.
Based on discussion from
https://github.com/bevyengine/bevy/discussions/14998,
https://github.com/zeux/meshoptimizer/discussions/750, and discord.
### Runtime changes
- cluster:triangle packed IDs are now stored 25:7 instead of 26:6 bits,
as max triangles per cluster are now 128 instead of 64
- Hardware raster now spawns 128 * 3 vertices instead of 64 * 3 vertices
to account for the new max triangles limit
- Hardware raster now outputs NaN triangles (0 / 0) instead of
zero-positioned triangles for extra vertex invocations over the cluster
triangle count. Shouldn't really be a difference idt, but I did it
anyways.
- Software raster now does 128 threads per workgroup instead of 64
threads. Each thread now loads, projects, and caches a vertex (vertices
0-127), and then if needed does so again (vertices 128-254). Each thread
then rasterizes one of 128 triangles.
- Fixed a bug with `needs_dispatch_remap`. I had the condition backwards
in my last PR, I probably committed it by accident after testing the
non-default code path on my GPU.
2024-09-08 17:55:57 +00:00
|
|
|
let cluster_id = packed_ids >> 7u;
|
2024-05-04 19:56:19 +00:00
|
|
|
let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id];
|
2024-03-25 19:08:27 +00:00
|
|
|
let meshlet = meshlets[meshlet_id];
|
2024-06-10 20:18:43 +00:00
|
|
|
|
More triangles/vertices per meshlet (#15023)
### Builder changes
- Increased meshlet max vertices/triangles from 64v/64t to 255v/128t
(meshoptimizer won't allow 256v sadly). This gives us a much greater
percentage of meshlets with max triangle count (128). Still not perfect,
we still end up with some tiny <=10 triangle meshlets that never really
get simplified, but it's progress.
- Removed the error target limit. Now we allow meshoptimizer to simplify
as much as possible. No reason to cap this out, as the cluster culling
code will choose a good LOD level anyways. Again leads to higher quality
LOD trees.
- After some discussion and consulting the Nanite slides again, changed
meshlet group error from _adding_ the max child's error to the group
error, to doing `group_error = max(group_error, max_child_error)`. Error
is already cumulative between LODs as the edges we're collapsing during
simplification get longer each time.
- Bumped the 65% simplification threshold to allow up to 95% of the
original geometry (e.g. accept simplification as valid even if we only
simplified 5% of the triangles). This gives us closer to
log2(initial_meshlet_count) LOD levels, and fewer meshlet roots in the
DAG.
Still more work to be done in the future here. Maybe trying METIS for
meshlet building instead of meshoptimizer.
Using ~8 clusters per group instead of ~4 might also make a big
difference. The Nanite slides say that they have 8-32 meshlets per
group, suggesting some kind of heuristic. Unfortunately meshopt's
compute_cluster_bounds won't work with large groups atm
(https://github.com/zeux/meshoptimizer/discussions/750#discussioncomment-10562641)
so hard to test.
Based on discussion from
https://github.com/bevyengine/bevy/discussions/14998,
https://github.com/zeux/meshoptimizer/discussions/750, and discord.
### Runtime changes
- cluster:triangle packed IDs are now stored 25:7 instead of 26:6 bits,
as max triangles per cluster are now 128 instead of 64
- Hardware raster now spawns 128 * 3 vertices instead of 64 * 3 vertices
to account for the new max triangles limit
- Hardware raster now outputs NaN triangles (0 / 0) instead of
zero-positioned triangles for extra vertex invocations over the cluster
triangle count. Shouldn't really be a difference idt, but I did it
anyways.
- Software raster now does 128 threads per workgroup instead of 64
threads. Each thread now loads, projects, and caches a vertex (vertices
0-127), and then if needed does so again (vertices 128-254). Each thread
then rasterizes one of 128 triangles.
- Fixed a bug with `needs_dispatch_remap`. I had the condition backwards
in my last PR, I probably committed it by accident after testing the
non-default code path on my GPU.
2024-09-08 17:55:57 +00:00
|
|
|
let triangle_id = extractBits(packed_ids, 0u, 7u);
|
Meshlet software raster + start of cleanup (#14623)
# Objective
- Faster meshlet rasterization path for small triangles
- Avoid having to allocate and write out a triangle buffer
- Refactor gpu_scene.rs
## Solution
- Replace the 32bit visbuffer texture with a 64bit visbuffer buffer,
where the left 32 bits encode depth, and the right 32 bits encode the
existing cluster + triangle IDs. Can't use 64bit textures, wgpu/naga
doesn't support atomic ops on textures yet.
- Instead of writing out a buffer of packed cluster + triangle IDs (per
triangle) to raster, the culling pass now writes out a buffer of just
cluster IDs (per cluster, so less memory allocated, cheaper to write
out).
- Clusters for software raster are allocated from the left side
- Clusters for hardware raster are allocated in the same buffer, from
the right side
- The buffer size is fixed at MeshletPlugin build time, and should be
set to a reasonable value for your scene (no warning on overflow, and no
good way to determine what value you need outside of renderdoc - I plan
to fix this in a future PR adding a meshlet stats overlay)
- Currently I don't have a heuristic for software vs hardware raster
selection for each cluster. The existing code is just a placeholder. I
need to profile on a release scene and come up with a heuristic,
probably in a future PR.
- The culling shader is getting pretty hard to follow at this point, but
I don't want to spend time improving it as the entire shader/pass is
getting rewritten/replaced in the near future.
- Software raster is a compute workgroup per-cluster. Each workgroup
loads and transforms the <=64 vertices of the cluster, and then
rasterizes the <=64 triangles of the cluster.
- Two variants are implemented: Scanline for clusters with any larger
triangles (still smaller than hardware is good at), and brute-force for
very very tiny triangles
- Once the shader determines that a pixel should be filled in, it does
an atomicMax() on the visbuffer to store the results, copying how Nanite
works
- On devices with a low max workgroups per dispatch limit, an extra
compute pass is inserted before software raster to convert from a 1d to
2d dispatch (I don't think 3d would ever be necessary).
- I haven't implemented the top-left rule or subpixel precision yet, I'm
leaving that for a future PR since I get usable results without it for
now
- Resources used:
https://kristoffer-dyrkorn.github.io/triangle-rasterizer and chapters
6-8 of
https://fgiesen.wordpress.com/2013/02/17/optimizing-sw-occlusion-culling-index
- Hardware raster now spawns 64*3 vertex invocations per meshlet,
instead of the actual meshlet vertex count. Extra invocations just
early-exit.
- While this is slower than the existing system, hardware draws should
be rare now that software raster is usable, and it saves a ton of memory
using the unified cluster ID buffer. This would be fixed if wgpu had
support for mesh shaders.
- Instead of writing to a color+depth attachment, the hardware raster
pass also does the same atomic visbuffer writes that software raster
uses.
- We have to bind a dummy render target anyways, as wgpu doesn't
currently support render passes without any attachments
- Material IDs are no longer written out during the main rasterization
passes.
- If we had async compute queues, we could overlap the software and
hardware raster passes.
- New material and depth resolve passes run at the end of the visbuffer
node, and write out view depth and material ID depth textures
### Misc changes
- Fixed cluster culling importing, but never actually using the previous
view uniforms when doing occlusion culling
- Fixed incorrectly adding the LOD error twice when building the meshlet
mesh
- Splitup gpu_scene module into meshlet_mesh_manager, instance_manager,
and resource_manager
- resource_manager is still too complex and inefficient (extract and
prepare are way too expensive). I plan on improving this in a future PR,
but for now ResourceManager is mostly a 1:1 port of the leftover
MeshletGpuScene bits.
- Material draw passes have been renamed to the more accurate material
shade pass, as well as some other misc renaming (in the future, these
will be compute shaders even, and not actual draw calls)
---
## Migration Guide
- TBD (ask me at the end of the release for meshlet changes as a whole)
---------
Co-authored-by: vero <email@atlasdostal.com>
2024-08-26 17:54:34 +00:00
|
|
|
let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u);
|
2024-03-25 19:08:27 +00:00
|
|
|
let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z));
|
|
|
|
let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]);
|
|
|
|
let vertex_1 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.x]);
|
|
|
|
let vertex_2 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.y]);
|
|
|
|
let vertex_3 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.z]);
|
|
|
|
|
2024-05-04 19:56:19 +00:00
|
|
|
let instance_id = meshlet_cluster_instance_ids[cluster_id];
|
2024-06-10 20:18:43 +00:00
|
|
|
var instance_uniform = meshlet_instance_uniforms[instance_id];
|
2024-03-25 19:08:27 +00:00
|
|
|
|
2024-06-10 20:18:43 +00:00
|
|
|
let world_from_local = affine3_to_square(instance_uniform.world_from_local);
|
Normalise matrix naming (#13489)
# Objective
- Fixes #10909
- Fixes #8492
## Solution
- Name all matrices `x_from_y`, for example `world_from_view`.
## Testing
- I've tested most of the 3D examples. The `lighting` example
particularly should hit a lot of the changes and appears to run fine.
---
## Changelog
- Renamed matrices across the engine to follow a `y_from_x` naming,
making the space conversion more obvious.
## Migration Guide
- `Frustum`'s `from_view_projection`, `from_view_projection_custom_far`
and `from_view_projection_no_far` were renamed to
`from_clip_from_world`, `from_clip_from_world_custom_far` and
`from_clip_from_world_no_far`.
- `ComputedCameraValues::projection_matrix` was renamed to
`clip_from_view`.
- `CameraProjection::get_projection_matrix` was renamed to
`get_clip_from_view` (this affects implementations on `Projection`,
`PerspectiveProjection` and `OrthographicProjection`).
- `ViewRangefinder3d::from_view_matrix` was renamed to
`from_world_from_view`.
- `PreviousViewData`'s members were renamed to `view_from_world` and
`clip_from_world`.
- `ExtractedView`'s `projection`, `transform` and `view_projection` were
renamed to `clip_from_view`, `world_from_view` and `clip_from_world`.
- `ViewUniform`'s `view_proj`, `unjittered_view_proj`,
`inverse_view_proj`, `view`, `inverse_view`, `projection` and
`inverse_projection` were renamed to `clip_from_world`,
`unjittered_clip_from_world`, `world_from_clip`, `world_from_view`,
`view_from_world`, `clip_from_view` and `view_from_clip`.
- `GpuDirectionalCascade::view_projection` was renamed to
`clip_from_world`.
- `MeshTransforms`' `transform` and `previous_transform` were renamed to
`world_from_local` and `previous_world_from_local`.
- `MeshUniform`'s `transform`, `previous_transform`,
`inverse_transpose_model_a` and `inverse_transpose_model_b` were renamed
to `world_from_local`, `previous_world_from_local`,
`local_from_world_transpose_a` and `local_from_world_transpose_b` (the
`Mesh` type in WGSL mirrors this, however `transform` and
`previous_transform` were named `model` and `previous_model`).
- `Mesh2dTransforms::transform` was renamed to `world_from_local`.
- `Mesh2dUniform`'s `transform`, `inverse_transpose_model_a` and
`inverse_transpose_model_b` were renamed to `world_from_local`,
`local_from_world_transpose_a` and `local_from_world_transpose_b` (the
`Mesh2d` type in WGSL mirrors this).
- In WGSL, in `bevy_pbr::mesh_functions`, `get_model_matrix` and
`get_previous_model_matrix` were renamed to `get_world_from_local` and
`get_previous_world_from_local`.
- In WGSL, `bevy_sprite::mesh2d_functions::get_model_matrix` was renamed
to `get_world_from_local`.
2024-06-03 16:56:53 +00:00
|
|
|
let world_position_1 = mesh_position_local_to_world(world_from_local, vec4(vertex_1.position, 1.0));
|
|
|
|
let world_position_2 = mesh_position_local_to_world(world_from_local, vec4(vertex_2.position, 1.0));
|
|
|
|
let world_position_3 = mesh_position_local_to_world(world_from_local, vec4(vertex_3.position, 1.0));
|
2024-05-04 19:56:19 +00:00
|
|
|
|
2024-03-25 19:08:27 +00:00
|
|
|
let clip_position_1 = position_world_to_clip(world_position_1.xyz);
|
|
|
|
let clip_position_2 = position_world_to_clip(world_position_2.xyz);
|
|
|
|
let clip_position_3 = position_world_to_clip(world_position_3.xyz);
|
|
|
|
let frag_coord_ndc = frag_coord_to_ndc(frag_coord).xy;
|
|
|
|
let partial_derivatives = compute_partial_derivatives(
|
|
|
|
array(clip_position_1, clip_position_2, clip_position_3),
|
|
|
|
frag_coord_ndc,
|
|
|
|
view.viewport.zw,
|
|
|
|
);
|
|
|
|
|
|
|
|
let world_position = mat3x4(world_position_1, world_position_2, world_position_3) * partial_derivatives.barycentrics;
|
2024-06-10 20:18:43 +00:00
|
|
|
let world_normal = mat3x3(
|
|
|
|
normal_local_to_world(vertex_1.normal, &instance_uniform),
|
|
|
|
normal_local_to_world(vertex_2.normal, &instance_uniform),
|
|
|
|
normal_local_to_world(vertex_3.normal, &instance_uniform),
|
|
|
|
) * partial_derivatives.barycentrics;
|
2024-03-25 19:08:27 +00:00
|
|
|
let uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.barycentrics;
|
|
|
|
let ddx_uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.ddx;
|
|
|
|
let ddy_uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.ddy;
|
2024-06-10 20:18:43 +00:00
|
|
|
let world_tangent = mat3x4(
|
|
|
|
tangent_local_to_world(vertex_1.tangent, world_from_local, instance_uniform.flags),
|
|
|
|
tangent_local_to_world(vertex_2.tangent, world_from_local, instance_uniform.flags),
|
|
|
|
tangent_local_to_world(vertex_3.tangent, world_from_local, instance_uniform.flags),
|
|
|
|
) * partial_derivatives.barycentrics;
|
2024-03-25 19:08:27 +00:00
|
|
|
|
|
|
|
#ifdef PREPASS_FRAGMENT
|
|
|
|
#ifdef MOTION_VECTOR_PREPASS
|
Normalise matrix naming (#13489)
# Objective
- Fixes #10909
- Fixes #8492
## Solution
- Name all matrices `x_from_y`, for example `world_from_view`.
## Testing
- I've tested most of the 3D examples. The `lighting` example
particularly should hit a lot of the changes and appears to run fine.
---
## Changelog
- Renamed matrices across the engine to follow a `y_from_x` naming,
making the space conversion more obvious.
## Migration Guide
- `Frustum`'s `from_view_projection`, `from_view_projection_custom_far`
and `from_view_projection_no_far` were renamed to
`from_clip_from_world`, `from_clip_from_world_custom_far` and
`from_clip_from_world_no_far`.
- `ComputedCameraValues::projection_matrix` was renamed to
`clip_from_view`.
- `CameraProjection::get_projection_matrix` was renamed to
`get_clip_from_view` (this affects implementations on `Projection`,
`PerspectiveProjection` and `OrthographicProjection`).
- `ViewRangefinder3d::from_view_matrix` was renamed to
`from_world_from_view`.
- `PreviousViewData`'s members were renamed to `view_from_world` and
`clip_from_world`.
- `ExtractedView`'s `projection`, `transform` and `view_projection` were
renamed to `clip_from_view`, `world_from_view` and `clip_from_world`.
- `ViewUniform`'s `view_proj`, `unjittered_view_proj`,
`inverse_view_proj`, `view`, `inverse_view`, `projection` and
`inverse_projection` were renamed to `clip_from_world`,
`unjittered_clip_from_world`, `world_from_clip`, `world_from_view`,
`view_from_world`, `clip_from_view` and `view_from_clip`.
- `GpuDirectionalCascade::view_projection` was renamed to
`clip_from_world`.
- `MeshTransforms`' `transform` and `previous_transform` were renamed to
`world_from_local` and `previous_world_from_local`.
- `MeshUniform`'s `transform`, `previous_transform`,
`inverse_transpose_model_a` and `inverse_transpose_model_b` were renamed
to `world_from_local`, `previous_world_from_local`,
`local_from_world_transpose_a` and `local_from_world_transpose_b` (the
`Mesh` type in WGSL mirrors this, however `transform` and
`previous_transform` were named `model` and `previous_model`).
- `Mesh2dTransforms::transform` was renamed to `world_from_local`.
- `Mesh2dUniform`'s `transform`, `inverse_transpose_model_a` and
`inverse_transpose_model_b` were renamed to `world_from_local`,
`local_from_world_transpose_a` and `local_from_world_transpose_b` (the
`Mesh2d` type in WGSL mirrors this).
- In WGSL, in `bevy_pbr::mesh_functions`, `get_model_matrix` and
`get_previous_model_matrix` were renamed to `get_world_from_local` and
`get_previous_world_from_local`.
- In WGSL, `bevy_sprite::mesh2d_functions::get_model_matrix` was renamed
to `get_world_from_local`.
2024-06-03 16:56:53 +00:00
|
|
|
let previous_world_from_local = affine3_to_square(instance_uniform.previous_world_from_local);
|
|
|
|
let previous_world_position_1 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_1.position, 1.0));
|
|
|
|
let previous_world_position_2 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_2.position, 1.0));
|
|
|
|
let previous_world_position_3 = mesh_position_local_to_world(previous_world_from_local, vec4(vertex_3.position, 1.0));
|
2024-06-10 20:18:43 +00:00
|
|
|
let previous_world_position = mat3x4(previous_world_position_1, previous_world_position_2, previous_world_position_3) * partial_derivatives.barycentrics;
|
2024-03-25 19:08:27 +00:00
|
|
|
let motion_vector = calculate_motion_vector(world_position, previous_world_position);
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return VertexOutput(
|
|
|
|
frag_coord,
|
|
|
|
world_position,
|
|
|
|
world_normal,
|
|
|
|
uv,
|
|
|
|
ddx_uv,
|
|
|
|
ddy_uv,
|
|
|
|
world_tangent,
|
|
|
|
instance_uniform.flags,
|
2024-06-10 13:06:08 +00:00
|
|
|
cluster_id,
|
2024-03-25 19:08:27 +00:00
|
|
|
#ifdef PREPASS_FRAGMENT
|
|
|
|
#ifdef MOTION_VECTOR_PREPASS
|
|
|
|
motion_vector,
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
);
|
|
|
|
}
|
2024-06-10 20:18:43 +00:00
|
|
|
|
|
|
|
fn normal_local_to_world(vertex_normal: vec3<f32>, instance_uniform: ptr<function, Mesh>) -> vec3<f32> {
|
|
|
|
if any(vertex_normal != vec3<f32>(0.0)) {
|
|
|
|
return normalize(
|
|
|
|
mat2x4_f32_to_mat3x3_unpack(
|
|
|
|
(*instance_uniform).local_from_world_transpose_a,
|
|
|
|
(*instance_uniform).local_from_world_transpose_b,
|
|
|
|
) * vertex_normal
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
return vertex_normal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn tangent_local_to_world(vertex_tangent: vec4<f32>, world_from_local: mat4x4<f32>, mesh_flags: u32) -> vec4<f32> {
|
|
|
|
if any(vertex_tangent != vec4<f32>(0.0)) {
|
|
|
|
return vec4<f32>(
|
|
|
|
normalize(
|
|
|
|
mat3x3<f32>(
|
|
|
|
world_from_local[0].xyz,
|
|
|
|
world_from_local[1].xyz,
|
|
|
|
world_from_local[2].xyz,
|
|
|
|
) * vertex_tangent.xyz
|
|
|
|
),
|
|
|
|
vertex_tangent.w * sign_determinant_model_3x3m(mesh_flags)
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
return vertex_tangent;
|
|
|
|
}
|
|
|
|
}
|
2024-03-25 19:08:27 +00:00
|
|
|
#endif
|