From c061ec33c8eac477bbdf09094289027e75f5c325 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Tue, 14 Dec 2021 23:42:35 +0000 Subject: [PATCH] bevy_pbr2: Fix clustering for orthographic projections (#3316) # Objective PBR lighting was broken in the new renderer when using orthographic projections due to the way the depth slicing works for the clusters. Fix it. ## Solution - The default orthographic projection near plane is 0.0. The perspective projection depth slicing does a division by the near plane which gives a floating point NaN and the clustering all breaks down. - Orthographic projections have a linear depth mapping, so it made intuitive sense to me to do depth slicing with a linear mapping too. The alternative I saw was to try to handle the near plane being at 0.0 and using the exponential depth slicing, but that felt like a hack that didn't make sense. - As such, I have added code that detects whether the projection is orthographic based on `projection[3][3] == 1.0` and then implemented the orthographic mapping case throughout (when computing cluster AABBs, and when mapping a view space position (or light) to a cluster id in both the rust and shader code). ## Screenshots Before: ![before](https://user-images.githubusercontent.com/302146/145847278-5b1bca74-fbad-4cc5-8b49-384f6a377fdc.png) After: Screenshot 2021-12-13 at 16 36 53 Old renderer (slightly lighter due to slight difference in configured intensity): Screenshot 2021-12-13 at 16 42 23 --- crates/bevy_pbr/src/light.rs | 93 +++++++++++++------ crates/bevy_pbr/src/render/light.rs | 43 +++++++-- crates/bevy_pbr/src/render/mesh.rs | 11 +-- .../src/render/mesh_view_bind_group.wgsl | 7 ++ crates/bevy_pbr/src/render/pbr.wgsl | 86 ++++++++++------- 5 files changed, 165 insertions(+), 75 deletions(-) diff --git a/crates/bevy_pbr/src/light.rs b/crates/bevy_pbr/src/light.rs index 12fcc2dd86..b80f4fe8b7 100644 --- a/crates/bevy_pbr/src/light.rs +++ b/crates/bevy_pbr/src/light.rs @@ -12,7 +12,8 @@ use bevy_transform::components::GlobalTransform; use bevy_window::Windows; use crate::{ - CubeMapFace, CubemapVisibleEntities, ViewClusterBindings, CUBE_MAP_FACES, POINT_LIGHT_NEAR_Z, + calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings, + CUBE_MAP_FACES, POINT_LIGHT_NEAR_Z, }; /// A light that emits light in all directions from a central point. @@ -265,12 +266,14 @@ fn line_intersection_to_z_plane(origin: Vec3, p: Vec3, z: f32) -> Vec3 { origin + t * v } +#[allow(clippy::too_many_arguments)] fn compute_aabb_for_cluster( z_near: f32, z_far: f32, tile_size: Vec2, screen_size: Vec2, inverse_projection: Mat4, + is_orthographic: bool, cluster_dimensions: UVec3, ijk: UVec3, ) -> Aabb { @@ -280,25 +283,52 @@ fn compute_aabb_for_cluster( let p_min = ijk.xy() * tile_size; let p_max = p_min + tile_size; - // Convert to view space at the near plane - // NOTE: 1.0 is the near plane due to using reverse z projections - let p_min = screen_to_view(screen_size, inverse_projection, p_min, 1.0); - let p_max = screen_to_view(screen_size, inverse_projection, p_max, 1.0); + let cluster_min; + let cluster_max; + if is_orthographic { + // Use linear depth slicing for orthographic - let z_far_over_z_near = -z_far / -z_near; - let cluster_near = -z_near * z_far_over_z_near.powf(ijk.z / cluster_dimensions.z as f32); - // NOTE: This could be simplified to: - // let cluster_far = cluster_near * z_far_over_z_near; - let cluster_far = -z_near * z_far_over_z_near.powf((ijk.z + 1.0) / cluster_dimensions.z as f32); + // Convert to view space at the cluster near and far planes + // NOTE: 1.0 is the near plane due to using reverse z projections + let p_min = screen_to_view( + screen_size, + inverse_projection, + p_min, + 1.0 - (ijk.z / cluster_dimensions.z as f32), + ) + .xyz(); + let p_max = screen_to_view( + screen_size, + inverse_projection, + p_max, + 1.0 - ((ijk.z + 1.0) / cluster_dimensions.z as f32), + ) + .xyz(); - // Calculate the four intersection points of the min and max points with the cluster near and far planes - let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near); - let p_min_far = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_far); - let p_max_near = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_near); - let p_max_far = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_far); + cluster_min = p_min.min(p_max); + cluster_max = p_min.max(p_max); + } else { + // Convert to view space at the near plane + // NOTE: 1.0 is the near plane due to using reverse z projections + let p_min = screen_to_view(screen_size, inverse_projection, p_min, 1.0); + let p_max = screen_to_view(screen_size, inverse_projection, p_max, 1.0); - let cluster_min = p_min_near.min(p_min_far).min(p_max_near.min(p_max_far)); - let cluster_max = p_min_near.max(p_min_far).max(p_max_near.max(p_max_far)); + let z_far_over_z_near = -z_far / -z_near; + let cluster_near = -z_near * z_far_over_z_near.powf(ijk.z / cluster_dimensions.z as f32); + // NOTE: This could be simplified to: + // cluster_far = cluster_near * z_far_over_z_near; + let cluster_far = + -z_near * z_far_over_z_near.powf((ijk.z + 1.0) / cluster_dimensions.z as f32); + + // Calculate the four intersection points of the min and max points with the cluster near and far planes + let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near); + let p_min_far = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_far); + let p_max_near = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_near); + let p_max_far = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_far); + + cluster_min = p_min_near.min(p_min_far).min(p_max_near.min(p_max_far)); + cluster_max = p_min_near.max(p_min_far).max(p_max_near.max(p_max_far)); + } Aabb::from_min_max(cluster_min, cluster_max) } @@ -322,6 +352,7 @@ pub fn add_clusters( pub fn update_clusters(windows: Res, mut views: Query<(&Camera, &mut Clusters)>) { for (camera, mut clusters) in views.iter_mut() { + let is_orthographic = camera.projection_matrix.w_axis.w == 1.0; let inverse_projection = camera.projection_matrix.inverse(); let window = windows.get(camera.window).unwrap(); let screen_size_u32 = UVec2::new(window.physical_width(), window.physical_height()); @@ -348,6 +379,7 @@ pub fn update_clusters(windows: Res, mut views: Query<(&Camera, &mut Cl tile_size, screen_size, inverse_projection, + is_orthographic, clusters.axis_slices, UVec3::new(x, y, z), )); @@ -383,14 +415,20 @@ impl VisiblePointLights { } } -fn view_z_to_z_slice(cluster_factors: Vec2, view_z: f32) -> u32 { - // NOTE: had to use -view_z to make it positive else log(negative) is nan - ((-view_z).ln() * cluster_factors.x - cluster_factors.y).floor() as u32 +fn view_z_to_z_slice(cluster_factors: Vec2, view_z: f32, is_orthographic: bool) -> u32 { + if is_orthographic { + // NOTE: view_z is correct in the orthographic case + ((view_z - cluster_factors.x) * cluster_factors.y).floor() as u32 + } else { + // NOTE: had to use -view_z to make it positive else log(negative) is nan + ((-view_z).ln() * cluster_factors.x - cluster_factors.y).floor() as u32 + } } fn ndc_position_to_cluster( cluster_dimensions: UVec3, cluster_factors: Vec2, + is_orthographic: bool, ndc_p: Vec3, view_z: f32, ) -> UVec3 { @@ -398,7 +436,7 @@ fn ndc_position_to_cluster( let frag_coord = (ndc_p.xy() * Vec2::new(0.5, -0.5) + Vec2::splat(0.5)).clamp(Vec2::ZERO, Vec2::ONE); let xy = (frag_coord * cluster_dimensions_f32.xy()).floor(); - let z_slice = view_z_to_z_slice(cluster_factors, view_z); + let z_slice = view_z_to_z_slice(cluster_factors, view_z, is_orthographic); xy.as_uvec2() .extend(z_slice) .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE) @@ -421,11 +459,12 @@ pub fn assign_lights_to_clusters( let view_transform = view_transform.compute_matrix(); let inverse_view_transform = view_transform.inverse(); let cluster_count = clusters.aabbs.len(); - let z_slices_of_ln_zfar_over_znear = - clusters.axis_slices.z as f32 / (camera.far / camera.near).ln(); - let cluster_factors = Vec2::new( - z_slices_of_ln_zfar_over_znear, - camera.near.ln() * z_slices_of_ln_zfar_over_znear, + let is_orthographic = camera.projection_matrix.w_axis.w == 1.0; + let cluster_factors = calculate_cluster_factors( + camera.near, + camera.far, + clusters.axis_slices.z as f32, + is_orthographic, ); let mut clusters_lights = @@ -501,12 +540,14 @@ pub fn assign_lights_to_clusters( let min_cluster = ndc_position_to_cluster( clusters.axis_slices, cluster_factors, + is_orthographic, light_aabb_ndc_min, light_aabb_view_min.z, ); let max_cluster = ndc_position_to_cluster( clusters.axis_slices, cluster_factors, + is_orthographic, light_aabb_ndc_max, light_aabb_view_max.z, ); diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index e1121e7b6d..213254ee01 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -10,7 +10,7 @@ use bevy_ecs::{ prelude::*, system::{lifetimeless::*, SystemParamItem}, }; -use bevy_math::{const_vec3, Mat4, UVec3, UVec4, Vec3, Vec4, Vec4Swizzles}; +use bevy_math::{const_vec3, Mat4, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; use bevy_render::{ camera::{Camera, CameraProjection}, color::Color, @@ -540,6 +540,22 @@ pub enum LightEntity { face_index: usize, }, } +pub fn calculate_cluster_factors( + near: f32, + far: f32, + z_slices: f32, + is_orthographic: bool, +) -> Vec2 { + if is_orthographic { + Vec2::new(-near, z_slices / (-far - -near)) + } else { + let z_slices_of_ln_zfar_over_znear = z_slices / (far / near).ln(); + Vec2::new( + z_slices_of_ln_zfar_over_znear, + near.ln() * z_slices_of_ln_zfar_over_znear, + ) + } +} #[allow(clippy::too_many_arguments)] pub fn prepare_lights( @@ -644,8 +660,14 @@ pub fn prepare_lights( ); let mut view_lights = Vec::new(); - let z_times_ln_far_over_near = - clusters.axis_slices.z as f32 / (extracted_view.far / extracted_view.near).ln(); + let is_orthographic = extracted_view.projection.w_axis.w == 1.0; + let cluster_factors_zw = calculate_cluster_factors( + extracted_view.near, + extracted_view.far, + clusters.axis_slices.z as f32, + is_orthographic, + ); + let mut gpu_lights = GpuLights { directional_lights: [GpuDirectionalLight::default(); MAX_DIRECTIONAL_LIGHTS], ambient_color: Vec4::from_slice(&ambient_light.color.as_linear_rgba_f32()) @@ -653,8 +675,8 @@ pub fn prepare_lights( cluster_factors: Vec4::new( clusters.axis_slices.x as f32 / extracted_view.width as f32, clusters.axis_slices.y as f32 / extracted_view.height as f32, - z_times_ln_far_over_near, - extracted_view.near.ln() * z_times_ln_far_over_near, + cluster_factors_zw.x, + cluster_factors_zw.y, ), cluster_dimensions: clusters.axis_slices.extend(0), n_directional_lights: directional_lights.iter().len() as u32, @@ -855,15 +877,16 @@ const CLUSTER_COUNT_MASK: u32 = (1 << 8) - 1; const POINT_LIGHT_INDEX_MASK: u32 = (1 << 8) - 1; // NOTE: With uniform buffer max binding size as 16384 bytes -// that means we can fit say 128 point lights in one uniform -// buffer, which means the count can be at most 128 so it -// needs 7 bits, use 8 for convenience. +// that means we can fit say 256 point lights in one uniform +// buffer, which means the count can be at most 256 so it +// needs 8 bits. // The array of indices can also use u8 and that means the // offset in to the array of indices needs to be able to address -// 16384 values. lod2(16384) = 21 bits. +// 16384 values. log2(16384) = 14 bits. // This means we can pack the offset into the upper 24 bits of a u32 // and the count into the lower 8 bits. -// FIXME: Probably there are endianness concerns here????!!!!! +// NOTE: This assumes CPU and GPU endianness are the same which is true +// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs fn pack_offset_and_count(offset: usize, count: usize) -> u32 { ((offset as u32 & CLUSTER_OFFSET_MASK) << CLUSTER_COUNT_SIZE) | (count as u32 & CLUSTER_COUNT_MASK) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index dbc94e5fcb..ceebb03e66 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -245,7 +245,7 @@ impl FromWorld for MeshPipeline { ty: BufferBindingType::Uniform, has_dynamic_offset: false, // NOTE: Static size for uniform buffers. GpuPointLight has a padded - // size of 128 bytes, so 16384 / 128 = 128 point lights max + // size of 64 bytes, so 16384 / 64 = 256 point lights max min_binding_size: BufferSize::new(16384), }, count: None, @@ -257,8 +257,7 @@ impl FromWorld for MeshPipeline { ty: BindingType::Buffer { ty: BufferBindingType::Uniform, has_dynamic_offset: false, - // NOTE: With 128 point lights max, indices need 7 bits. Use u8 for - // convenience. + // NOTE: With 256 point lights max, indices need 8 bits so use u8 min_binding_size: BufferSize::new(16384), }, count: None, @@ -270,10 +269,10 @@ impl FromWorld for MeshPipeline { ty: BindingType::Buffer { ty: BufferBindingType::Uniform, has_dynamic_offset: false, - // NOTE: The offset needs to address 16384 indices, which needs 21 bits. - // The count can be at most all 128 lights so 7 bits. + // NOTE: The offset needs to address 16384 indices, which needs 14 bits. + // The count can be at most all 256 lights so 8 bits. // Pack the offset into the upper 24 bits and the count into the - // lower 8 bits for convenience. + // lower 8 bits. min_binding_size: BufferSize::new(16384), }, count: None, diff --git a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl index d4d506fba1..a6d4b0d238 100644 --- a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl +++ b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl @@ -43,8 +43,15 @@ struct Lights { // x/y/z dimensions cluster_dimensions: vec4; // xy are vec2(cluster_dimensions.xy) / vec2(view.width, view.height) + // + // For perspective projections: // z is cluster_dimensions.z / log(far / near) // w is cluster_dimensions.z * log(near) / log(far / near) + // + // For orthographic projections: + // NOTE: near and far are +ve but -z is infront of the camera + // z is -near + // w is cluster_dimensions.z / (-far - -near) cluster_factors: vec4; n_directional_lights: u32; }; diff --git a/crates/bevy_pbr/src/render/pbr.wgsl b/crates/bevy_pbr/src/render/pbr.wgsl index ec240265b9..0d25eedd1a 100644 --- a/crates/bevy_pbr/src/render/pbr.wgsl +++ b/crates/bevy_pbr/src/render/pbr.wgsl @@ -239,14 +239,19 @@ fn reinhard_extended_luminance(color: vec3, max_white_l: f32) -> vec3 return change_luminance(color, l_new); } -fn view_z_to_z_slice(view_z: f32) -> u32 { - // NOTE: had to use -view_z to make it positive else log(negative) is nan - return u32(floor(log(-view_z) * lights.cluster_factors.z - lights.cluster_factors.w)); +fn view_z_to_z_slice(view_z: f32, is_orthographic: bool) -> u32 { + if (is_orthographic) { + // NOTE: view_z is correct in the orthographic case + return u32(floor((view_z - lights.cluster_factors.z) * lights.cluster_factors.w)); + } else { + // NOTE: had to use -view_z to make it positive else log(negative) is nan + return u32(floor(log(-view_z) * lights.cluster_factors.z - lights.cluster_factors.w)); + } } -fn fragment_cluster_index(frag_coord: vec2, view_z: f32) -> u32 { +fn fragment_cluster_index(frag_coord: vec2, view_z: f32, is_orthographic: bool) -> u32 { let xy = vec2(floor(frag_coord * lights.cluster_factors.xy)); - let z_slice = view_z_to_z_slice(view_z); + let z_slice = view_z_to_z_slice(view_z, is_orthographic); return (xy.y * lights.cluster_dimensions.x + xy.x) * lights.cluster_dimensions.z + z_slice; } @@ -423,6 +428,10 @@ fn hsv2rgb(hue: f32, saturation: f32, value: f32) -> vec3 { return value * mix( vec3(1.0), rgb, vec3(saturation)); } +fn random1D(s: f32) -> f32 { + return fract(sin(s * 12.9898) * 43758.5453123); +} + struct FragmentInput { [[builtin(front_facing)]] is_front: bool; [[builtin(position)]] frag_coord: vec4; @@ -508,12 +517,14 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { } var V: vec3; - if (view.projection[3].w != 1.0) { // If the projection is not orthographic + // If the projection is not orthographic + let is_orthographic = view.projection[3].w == 1.0; + if (is_orthographic) { + // Orthographic view vector + V = normalize(vec3(view.view_proj[0].z, view.view_proj[1].z, view.view_proj[2].z)); + } else { // Only valid for a perpective projection V = normalize(view.world_position.xyz - in.world_position.xyz); - } else { - // Ortho view vec - V = normalize(vec3(view.view_proj[0].z, view.view_proj[1].z, view.view_proj[2].z)); } // Neubelt and Pettineo 2013, "Crafting a Next-gen Material Pipeline for The Order: 1886" @@ -538,7 +549,7 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { view.inverse_view[2].z, view.inverse_view[3].z ), in.world_position); - let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z); + let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z, is_orthographic); let offset_and_count = unpack_offset_and_count(cluster_index); for (var i: u32 = offset_and_count.offset; i < offset_and_count.offset + offset_and_count.count; i = i + 1u) { let light_id = get_light_id(i); @@ -573,31 +584,40 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { emissive.rgb * output_color.a, output_color.a); -#ifdef CLUSTERED_FORWARD_DEBUG // Cluster allocation debug (using 'over' alpha blending) - let cluster_debug_mode = 1; - let cluster_overlay_alpha = 1.0; - if (cluster_debug_mode == 0) { - // NOTE: This debug mode visualises the z-slices - var z_slice: u32 = view_z_to_z_slice(view_z); - // A hack to make the colors alternate a bit more - if ((z_slice & 1u) == 1u) { - z_slice = z_slice + lights.cluster_dimensions.z / 2u; - } - let slice_color = hsv2rgb(f32(z_slice) / f32(lights.cluster_dimensions.z + 1u), 1.0, 0.5); - output_color = vec4( - (1.0 - cluster_overlay_alpha) * output_color.rgb + cluster_overlay_alpha * slice_color, - output_color.a - ); - } elseif (cluster_debug_mode == 1) { - // NOTE: This debug mode visualises the number of lights within the cluster that contains - // the fragment. It shows a sort of lighting complexity measure. - output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r - + cluster_overlay_alpha * smoothStep(0.0, 16.0, f32(offset_and_count.count)); - output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g - + cluster_overlay_alpha * (1.0 - smoothStep(0.0, 16.0, f32(offset_and_count.count))); +#ifdef CLUSTERED_FORWARD_DEBUG_Z_SLICES + // NOTE: This debug mode visualises the z-slices + let cluster_overlay_alpha = 0.1; + var z_slice: u32 = view_z_to_z_slice(view_z, is_orthographic); + // A hack to make the colors alternate a bit more + if ((z_slice & 1u) == 1u) { + z_slice = z_slice + lights.cluster_dimensions.z / 2u; } -#endif + let slice_color = hsv2rgb(f32(z_slice) / f32(lights.cluster_dimensions.z + 1u), 1.0, 0.5); + output_color = vec4( + (1.0 - cluster_overlay_alpha) * output_color.rgb + cluster_overlay_alpha * slice_color, + output_color.a + ); +#endif // CLUSTERED_FORWARD_DEBUG_Z_SLICES +#ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_LIGHT_COMPLEXITY + // NOTE: This debug mode visualises the number of lights within the cluster that contains + // the fragment. It shows a sort of lighting complexity measure. + let cluster_overlay_alpha = 0.1; + let max_light_complexity_per_cluster = 64.0; + output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r + + cluster_overlay_alpha * smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count)); + output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g + + cluster_overlay_alpha * (1.0 - smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count))); +#endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_LIGHT_COMPLEXITY +#ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY + // NOTE: Visualizes the cluster to which the fragment belongs + let cluster_overlay_alpha = 0.1; + let cluster_color = hsv2rgb(random1D(f32(cluster_index)), 1.0, 0.5); + output_color = vec4( + (1.0 - cluster_overlay_alpha) * output_color.rgb + cluster_overlay_alpha * cluster_color, + output_color.a + ); +#endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY // tone_mapping output_color = vec4(reinhard_luminance(output_color.rgb), output_color.a);