diff --git a/crates/bevy_pbr/src/cluster/assign.rs b/crates/bevy_pbr/src/cluster/assign.rs new file mode 100644 index 0000000000..95e2bc4bfb --- /dev/null +++ b/crates/bevy_pbr/src/cluster/assign.rs @@ -0,0 +1,945 @@ +//! Assigning objects to clusters. + +use bevy_ecs::{ + entity::Entity, + system::{Commands, Local, Query, Res, ResMut}, +}; +use bevy_math::{Mat4, UVec3, Vec2, Vec3, Vec3A, Vec3Swizzles as _, Vec4, Vec4Swizzles as _}; +use bevy_render::{ + camera::Camera, + primitives::{Aabb, Frustum, HalfSpace, Sphere}, + render_resource::BufferBindingType, + renderer::RenderDevice, + view::{RenderLayers, ViewVisibility}, +}; +use bevy_transform::components::GlobalTransform; +use bevy_utils::tracing::warn; + +use crate::{ + ClusterConfig, ClusterFarZMode, Clusters, GlobalVisiblePointLights, PointLight, SpotLight, + ViewClusterBindings, VisiblePointLights, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, + MAX_UNIFORM_BUFFER_POINT_LIGHTS, +}; + +const NDC_MIN: Vec2 = Vec2::NEG_ONE; +const NDC_MAX: Vec2 = Vec2::ONE; + +const VEC2_HALF: Vec2 = Vec2::splat(0.5); +const VEC2_HALF_NEGATIVE_Y: Vec2 = Vec2::new(0.5, -0.5); + +#[derive(Clone)] +// data required for assigning lights to clusters +pub(crate) struct PointLightAssignmentData { + entity: Entity, + transform: GlobalTransform, + range: f32, + shadows_enabled: bool, + spot_light_angle: Option, + render_layers: RenderLayers, +} + +impl PointLightAssignmentData { + pub fn sphere(&self) -> Sphere { + Sphere { + center: self.transform.translation_vec3a(), + radius: self.range, + } + } +} + +// NOTE: Run this before update_point_light_frusta! +#[allow(clippy::too_many_arguments)] +pub(crate) fn assign_lights_to_clusters( + mut commands: Commands, + mut global_lights: ResMut, + mut views: Query<( + Entity, + &GlobalTransform, + &Camera, + &Frustum, + &ClusterConfig, + &mut Clusters, + Option<&RenderLayers>, + Option<&mut VisiblePointLights>, + )>, + point_lights_query: Query<( + Entity, + &GlobalTransform, + &PointLight, + Option<&RenderLayers>, + &ViewVisibility, + )>, + spot_lights_query: Query<( + Entity, + &GlobalTransform, + &SpotLight, + Option<&RenderLayers>, + &ViewVisibility, + )>, + mut lights: Local>, + mut cluster_aabb_spheres: Local>>, + mut max_point_lights_warning_emitted: Local, + render_device: Option>, +) { + let Some(render_device) = render_device else { + return; + }; + + global_lights.entities.clear(); + lights.clear(); + // collect just the relevant light query data into a persisted vec to avoid reallocating each frame + lights.extend( + point_lights_query + .iter() + .filter(|(.., visibility)| visibility.get()) + .map( + |(entity, transform, point_light, maybe_layers, _visibility)| { + PointLightAssignmentData { + entity, + transform: GlobalTransform::from_translation(transform.translation()), + shadows_enabled: point_light.shadows_enabled, + range: point_light.range, + spot_light_angle: None, + render_layers: maybe_layers.unwrap_or_default().clone(), + } + }, + ), + ); + lights.extend( + spot_lights_query + .iter() + .filter(|(.., visibility)| visibility.get()) + .map( + |(entity, transform, spot_light, maybe_layers, _visibility)| { + PointLightAssignmentData { + entity, + transform: *transform, + shadows_enabled: spot_light.shadows_enabled, + range: spot_light.range, + spot_light_angle: Some(spot_light.outer_angle), + render_layers: maybe_layers.unwrap_or_default().clone(), + } + }, + ), + ); + + let clustered_forward_buffer_binding_type = + render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); + let supports_storage_buffers = matches!( + clustered_forward_buffer_binding_type, + BufferBindingType::Storage { .. } + ); + if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !supports_storage_buffers { + lights.sort_by(|light_1, light_2| { + crate::point_light_order( + ( + &light_1.entity, + &light_1.shadows_enabled, + &light_1.spot_light_angle.is_some(), + ), + ( + &light_2.entity, + &light_2.shadows_enabled, + &light_2.spot_light_angle.is_some(), + ), + ) + }); + + // check each light against each view's frustum, keep only those that affect at least one of our views + let frusta: Vec<_> = views + .iter() + .map(|(_, _, _, frustum, _, _, _, _)| *frustum) + .collect(); + let mut lights_in_view_count = 0; + lights.retain(|light| { + // take one extra light to check if we should emit the warning + if lights_in_view_count == MAX_UNIFORM_BUFFER_POINT_LIGHTS + 1 { + false + } else { + let light_sphere = light.sphere(); + let light_in_view = frusta + .iter() + .any(|frustum| frustum.intersects_sphere(&light_sphere, true)); + + if light_in_view { + lights_in_view_count += 1; + } + + light_in_view + } + }); + + if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !*max_point_lights_warning_emitted { + warn!( + "MAX_UNIFORM_BUFFER_POINT_LIGHTS ({}) exceeded", + MAX_UNIFORM_BUFFER_POINT_LIGHTS + ); + *max_point_lights_warning_emitted = true; + } + + lights.truncate(MAX_UNIFORM_BUFFER_POINT_LIGHTS); + } + + for ( + view_entity, + camera_transform, + camera, + frustum, + config, + clusters, + maybe_layers, + mut visible_lights, + ) in &mut views + { + let view_layers = maybe_layers.unwrap_or_default(); + let clusters = clusters.into_inner(); + + if matches!(config, ClusterConfig::None) { + if visible_lights.is_some() { + commands.entity(view_entity).remove::(); + } + clusters.clear(); + continue; + } + + let Some(screen_size) = camera.physical_viewport_size() else { + clusters.clear(); + continue; + }; + + let mut requested_cluster_dimensions = config.dimensions_for_screen_size(screen_size); + + let view_transform = camera_transform.compute_matrix(); + let view_inv_scale = camera_transform.compute_transform().scale.recip(); + let view_inv_scale_max = view_inv_scale.abs().max_element(); + let inverse_view_transform = view_transform.inverse(); + let is_orthographic = camera.projection_matrix().w_axis.w == 1.0; + + let far_z = match config.far_z_mode() { + ClusterFarZMode::MaxLightRange => { + let inverse_view_row_2 = inverse_view_transform.row(2); + lights + .iter() + .map(|light| { + -inverse_view_row_2.dot(light.transform.translation().extend(1.0)) + + light.range * view_inv_scale.z + }) + .reduce(f32::max) + .unwrap_or(0.0) + } + ClusterFarZMode::Constant(far) => far, + }; + let first_slice_depth = match (is_orthographic, requested_cluster_dimensions.z) { + (true, _) => { + // NOTE: Based on glam's Mat4::orthographic_rh(), as used to calculate the orthographic projection + // matrix, we can calculate the projection's view-space near plane as follows: + // component 3,2 = r * near and 2,2 = r where r = 1.0 / (near - far) + // There is a caveat here that when calculating the projection matrix, near and far were swapped to give + // reversed z, consistent with the perspective projection. So, + // 3,2 = r * far and 2,2 = r where r = 1.0 / (far - near) + // rearranging r = 1.0 / (far - near), r * (far - near) = 1.0, r * far - 1.0 = r * near, near = (r * far - 1.0) / r + // = (3,2 - 1.0) / 2,2 + (camera.projection_matrix().w_axis.z - 1.0) / camera.projection_matrix().z_axis.z + } + (false, 1) => config.first_slice_depth().max(far_z), + _ => config.first_slice_depth(), + }; + let first_slice_depth = first_slice_depth * view_inv_scale.z; + + // NOTE: Ensure the far_z is at least as far as the first_depth_slice to avoid clustering problems. + let far_z = far_z.max(first_slice_depth); + let cluster_factors = crate::calculate_cluster_factors( + first_slice_depth, + far_z, + requested_cluster_dimensions.z as f32, + is_orthographic, + ); + + if config.dynamic_resizing() { + let mut cluster_index_estimate = 0.0; + for light in &lights { + let light_sphere = light.sphere(); + + // Check if the light is within the view frustum + if !frustum.intersects_sphere(&light_sphere, true) { + continue; + } + + // calculate a conservative aabb estimate of number of clusters affected by this light + // this overestimates index counts by at most 50% (and typically much less) when the whole light range is in view + // it can overestimate more significantly when light ranges are only partially in view + let (light_aabb_min, light_aabb_max) = cluster_space_light_aabb( + inverse_view_transform, + view_inv_scale, + camera.projection_matrix(), + &light_sphere, + ); + + // since we won't adjust z slices we can calculate exact number of slices required in z dimension + let z_cluster_min = view_z_to_z_slice( + cluster_factors, + requested_cluster_dimensions.z, + light_aabb_min.z, + is_orthographic, + ); + let z_cluster_max = view_z_to_z_slice( + cluster_factors, + requested_cluster_dimensions.z, + light_aabb_max.z, + is_orthographic, + ); + let z_count = + z_cluster_min.max(z_cluster_max) - z_cluster_min.min(z_cluster_max) + 1; + + // calculate x/y count using floats to avoid overestimating counts due to large initial tile sizes + let xy_min = light_aabb_min.xy(); + let xy_max = light_aabb_max.xy(); + // multiply by 0.5 to move from [-1,1] to [-0.5, 0.5], max extent of 1 in each dimension + let xy_count = (xy_max - xy_min) + * 0.5 + * Vec2::new( + requested_cluster_dimensions.x as f32, + requested_cluster_dimensions.y as f32, + ); + + // add up to 2 to each axis to account for overlap + let x_overlap = if xy_min.x <= -1.0 { 0.0 } else { 1.0 } + + if xy_max.x >= 1.0 { 0.0 } else { 1.0 }; + let y_overlap = if xy_min.y <= -1.0 { 0.0 } else { 1.0 } + + if xy_max.y >= 1.0 { 0.0 } else { 1.0 }; + cluster_index_estimate += + (xy_count.x + x_overlap) * (xy_count.y + y_overlap) * z_count as f32; + } + + if cluster_index_estimate > ViewClusterBindings::MAX_INDICES as f32 { + // scale x and y cluster count to be able to fit all our indices + + // we take the ratio of the actual indices over the index estimate. + // this is not guaranteed to be small enough due to overlapped tiles, but + // the conservative estimate is more than sufficient to cover the + // difference + let index_ratio = ViewClusterBindings::MAX_INDICES as f32 / cluster_index_estimate; + let xy_ratio = index_ratio.sqrt(); + + requested_cluster_dimensions.x = + ((requested_cluster_dimensions.x as f32 * xy_ratio).floor() as u32).max(1); + requested_cluster_dimensions.y = + ((requested_cluster_dimensions.y as f32 * xy_ratio).floor() as u32).max(1); + } + } + + clusters.update(screen_size, requested_cluster_dimensions); + clusters.near = first_slice_depth; + clusters.far = far_z; + + // NOTE: Maximum 4096 clusters due to uniform buffer size constraints + debug_assert!( + clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096 + ); + + let inverse_projection = camera.projection_matrix().inverse(); + + for lights in &mut clusters.lights { + lights.entities.clear(); + lights.point_light_count = 0; + lights.spot_light_count = 0; + } + let cluster_count = + (clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z) as usize; + clusters + .lights + .resize_with(cluster_count, VisiblePointLights::default); + + // initialize empty cluster bounding spheres + cluster_aabb_spheres.clear(); + cluster_aabb_spheres.extend(std::iter::repeat(None).take(cluster_count)); + + // Calculate the x/y/z cluster frustum planes in view space + let mut x_planes = Vec::with_capacity(clusters.dimensions.x as usize + 1); + let mut y_planes = Vec::with_capacity(clusters.dimensions.y as usize + 1); + let mut z_planes = Vec::with_capacity(clusters.dimensions.z as usize + 1); + + if is_orthographic { + let x_slices = clusters.dimensions.x as f32; + for x in 0..=clusters.dimensions.x { + let x_proportion = x as f32 / x_slices; + let x_pos = x_proportion * 2.0 - 1.0; + let view_x = clip_to_view(inverse_projection, Vec4::new(x_pos, 0.0, 1.0, 1.0)).x; + let normal = Vec3::X; + let d = view_x * normal.x; + x_planes.push(HalfSpace::new(normal.extend(d))); + } + + let y_slices = clusters.dimensions.y as f32; + for y in 0..=clusters.dimensions.y { + let y_proportion = 1.0 - y as f32 / y_slices; + let y_pos = y_proportion * 2.0 - 1.0; + let view_y = clip_to_view(inverse_projection, Vec4::new(0.0, y_pos, 1.0, 1.0)).y; + let normal = Vec3::Y; + let d = view_y * normal.y; + y_planes.push(HalfSpace::new(normal.extend(d))); + } + } else { + let x_slices = clusters.dimensions.x as f32; + for x in 0..=clusters.dimensions.x { + let x_proportion = x as f32 / x_slices; + let x_pos = x_proportion * 2.0 - 1.0; + let nb = clip_to_view(inverse_projection, Vec4::new(x_pos, -1.0, 1.0, 1.0)).xyz(); + let nt = clip_to_view(inverse_projection, Vec4::new(x_pos, 1.0, 1.0, 1.0)).xyz(); + let normal = nb.cross(nt); + let d = nb.dot(normal); + x_planes.push(HalfSpace::new(normal.extend(d))); + } + + let y_slices = clusters.dimensions.y as f32; + for y in 0..=clusters.dimensions.y { + let y_proportion = 1.0 - y as f32 / y_slices; + let y_pos = y_proportion * 2.0 - 1.0; + let nl = clip_to_view(inverse_projection, Vec4::new(-1.0, y_pos, 1.0, 1.0)).xyz(); + let nr = clip_to_view(inverse_projection, Vec4::new(1.0, y_pos, 1.0, 1.0)).xyz(); + let normal = nr.cross(nl); + let d = nr.dot(normal); + y_planes.push(HalfSpace::new(normal.extend(d))); + } + } + + let z_slices = clusters.dimensions.z; + for z in 0..=z_slices { + let view_z = z_slice_to_view_z(first_slice_depth, far_z, z_slices, z, is_orthographic); + let normal = -Vec3::Z; + let d = view_z * normal.z; + z_planes.push(HalfSpace::new(normal.extend(d))); + } + + let mut update_from_light_intersections = |visible_lights: &mut Vec| { + for light in &lights { + // check if the light layers overlap the view layers + if !view_layers.intersects(&light.render_layers) { + continue; + } + + let light_sphere = light.sphere(); + + // Check if the light is within the view frustum + if !frustum.intersects_sphere(&light_sphere, true) { + continue; + } + + // NOTE: The light intersects the frustum so it must be visible and part of the global set + global_lights.entities.insert(light.entity); + visible_lights.push(light.entity); + + // note: caching seems to be slower than calling twice for this aabb calculation + let (light_aabb_xy_ndc_z_view_min, light_aabb_xy_ndc_z_view_max) = + cluster_space_light_aabb( + inverse_view_transform, + view_inv_scale, + camera.projection_matrix(), + &light_sphere, + ); + + let min_cluster = ndc_position_to_cluster( + clusters.dimensions, + cluster_factors, + is_orthographic, + light_aabb_xy_ndc_z_view_min, + light_aabb_xy_ndc_z_view_min.z, + ); + let max_cluster = ndc_position_to_cluster( + clusters.dimensions, + cluster_factors, + is_orthographic, + light_aabb_xy_ndc_z_view_max, + light_aabb_xy_ndc_z_view_max.z, + ); + let (min_cluster, max_cluster) = + (min_cluster.min(max_cluster), min_cluster.max(max_cluster)); + + // What follows is the Iterative Sphere Refinement algorithm from Just Cause 3 + // Persson et al, Practical Clustered Shading + // http://newq.net/dl/pub/s2015_practical.pdf + // NOTE: A sphere under perspective projection is no longer a sphere. It gets + // stretched and warped, which prevents simpler algorithms from being correct + // as they often assume that the widest part of the sphere under projection is the + // center point on the axis of interest plus the radius, and that is not true! + let view_light_sphere = Sphere { + center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)), + radius: light_sphere.radius * view_inv_scale_max, + }; + let spot_light_dir_sin_cos = light.spot_light_angle.map(|angle| { + let (angle_sin, angle_cos) = angle.sin_cos(); + ( + (inverse_view_transform * light.transform.back().extend(0.0)) + .truncate() + .normalize(), + angle_sin, + angle_cos, + ) + }); + let light_center_clip = + camera.projection_matrix() * view_light_sphere.center.extend(1.0); + let light_center_ndc = light_center_clip.xyz() / light_center_clip.w; + let cluster_coordinates = ndc_position_to_cluster( + clusters.dimensions, + cluster_factors, + is_orthographic, + light_center_ndc, + view_light_sphere.center.z, + ); + let z_center = if light_center_ndc.z <= 1.0 { + Some(cluster_coordinates.z) + } else { + None + }; + let y_center = if light_center_ndc.y > 1.0 { + None + } else if light_center_ndc.y < -1.0 { + Some(clusters.dimensions.y + 1) + } else { + Some(cluster_coordinates.y) + }; + for z in min_cluster.z..=max_cluster.z { + let mut z_light = view_light_sphere.clone(); + if z_center.is_none() || z != z_center.unwrap() { + // The z plane closer to the light has the larger radius circle where the + // light sphere intersects the z plane. + let z_plane = if z_center.is_some() && z < z_center.unwrap() { + z_planes[(z + 1) as usize] + } else { + z_planes[z as usize] + }; + // Project the sphere to this z plane and use its radius as the radius of a + // new, refined sphere. + if let Some(projected) = project_to_plane_z(z_light, z_plane) { + z_light = projected; + } else { + continue; + } + } + for y in min_cluster.y..=max_cluster.y { + let mut y_light = z_light.clone(); + if y_center.is_none() || y != y_center.unwrap() { + // The y plane closer to the light has the larger radius circle where the + // light sphere intersects the y plane. + let y_plane = if y_center.is_some() && y < y_center.unwrap() { + y_planes[(y + 1) as usize] + } else { + y_planes[y as usize] + }; + // Project the refined sphere to this y plane and use its radius as the + // radius of a new, even more refined sphere. + if let Some(projected) = + project_to_plane_y(y_light, y_plane, is_orthographic) + { + y_light = projected; + } else { + continue; + } + } + // Loop from the left to find the first affected cluster + let mut min_x = min_cluster.x; + loop { + if min_x >= max_cluster.x + || -get_distance_x( + x_planes[(min_x + 1) as usize], + y_light.center, + is_orthographic, + ) + y_light.radius + > 0.0 + { + break; + } + min_x += 1; + } + // Loop from the right to find the last affected cluster + let mut max_x = max_cluster.x; + loop { + if max_x <= min_x + || get_distance_x( + x_planes[max_x as usize], + y_light.center, + is_orthographic, + ) + y_light.radius + > 0.0 + { + break; + } + max_x -= 1; + } + let mut cluster_index = ((y * clusters.dimensions.x + min_x) + * clusters.dimensions.z + + z) as usize; + + if let Some((view_light_direction, angle_sin, angle_cos)) = + spot_light_dir_sin_cos + { + for x in min_x..=max_x { + // further culling for spot lights + // get or initialize cluster bounding sphere + let cluster_aabb_sphere = &mut cluster_aabb_spheres[cluster_index]; + let cluster_aabb_sphere = if let Some(sphere) = cluster_aabb_sphere + { + &*sphere + } else { + let aabb = compute_aabb_for_cluster( + first_slice_depth, + far_z, + clusters.tile_size.as_vec2(), + screen_size.as_vec2(), + inverse_projection, + is_orthographic, + clusters.dimensions, + UVec3::new(x, y, z), + ); + let sphere = Sphere { + center: aabb.center, + radius: aabb.half_extents.length(), + }; + *cluster_aabb_sphere = Some(sphere); + cluster_aabb_sphere.as_ref().unwrap() + }; + + // test -- based on https://bartwronski.com/2017/04/13/cull-that-cone/ + let spot_light_offset = Vec3::from( + view_light_sphere.center - cluster_aabb_sphere.center, + ); + let spot_light_dist_sq = spot_light_offset.length_squared(); + let v1_len = spot_light_offset.dot(view_light_direction); + + let distance_closest_point = (angle_cos + * (spot_light_dist_sq - v1_len * v1_len).sqrt()) + - v1_len * angle_sin; + let angle_cull = + distance_closest_point > cluster_aabb_sphere.radius; + + let front_cull = v1_len + > cluster_aabb_sphere.radius + light.range * view_inv_scale_max; + let back_cull = v1_len < -cluster_aabb_sphere.radius; + + if !angle_cull && !front_cull && !back_cull { + // this cluster is affected by the spot light + clusters.lights[cluster_index].entities.push(light.entity); + clusters.lights[cluster_index].spot_light_count += 1; + } + cluster_index += clusters.dimensions.z as usize; + } + } else { + for _ in min_x..=max_x { + // all clusters within range are affected by point lights + clusters.lights[cluster_index].entities.push(light.entity); + clusters.lights[cluster_index].point_light_count += 1; + cluster_index += clusters.dimensions.z as usize; + } + } + } + } + } + }; + + // reuse existing visible lights Vec, if it exists + if let Some(visible_lights) = visible_lights.as_mut() { + visible_lights.entities.clear(); + update_from_light_intersections(&mut visible_lights.entities); + } else { + let mut entities = Vec::new(); + update_from_light_intersections(&mut entities); + commands.entity(view_entity).insert(VisiblePointLights { + entities, + ..Default::default() + }); + } + } +} + +#[allow(clippy::too_many_arguments)] +fn compute_aabb_for_cluster( + z_near: f32, + z_far: f32, + tile_size: Vec2, + screen_size: Vec2, + inverse_projection: Mat4, + is_orthographic: bool, + cluster_dimensions: UVec3, + ijk: UVec3, +) -> Aabb { + let ijk = ijk.as_vec3(); + + // Calculate the minimum and maximum points in screen space + let p_min = ijk.xy() * tile_size; + let p_max = p_min + tile_size; + + let cluster_min; + let cluster_max; + if is_orthographic { + // Use linear depth slicing for orthographic + + // Convert to view space at the cluster near and far planes + // NOTE: 1.0 is the near plane due to using reverse z projections + let mut p_min = screen_to_view(screen_size, inverse_projection, p_min, 0.0).xyz(); + let mut p_max = screen_to_view(screen_size, inverse_projection, p_max, 0.0).xyz(); + + // calculate cluster depth using z_near and z_far + p_min.z = -z_near + (z_near - z_far) * ijk.z / cluster_dimensions.z as f32; + p_max.z = -z_near + (z_near - z_far) * (ijk.z + 1.0) / cluster_dimensions.z as f32; + + cluster_min = p_min.min(p_max); + cluster_max = p_min.max(p_max); + } else { + // Convert to view space at the near plane + // NOTE: 1.0 is the near plane due to using reverse z projections + let p_min = screen_to_view(screen_size, inverse_projection, p_min, 1.0); + let p_max = screen_to_view(screen_size, inverse_projection, p_max, 1.0); + + let z_far_over_z_near = -z_far / -z_near; + let cluster_near = if ijk.z == 0.0 { + 0.0 + } else { + -z_near * z_far_over_z_near.powf((ijk.z - 1.0) / (cluster_dimensions.z - 1) as f32) + }; + // NOTE: This could be simplified to: + // cluster_far = cluster_near * z_far_over_z_near; + let cluster_far = if cluster_dimensions.z == 1 { + -z_far + } else { + -z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32) + }; + + // Calculate the four intersection points of the min and max points with the cluster near and far planes + let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near); + let p_min_far = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_far); + let p_max_near = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_near); + let p_max_far = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_far); + + cluster_min = p_min_near.min(p_min_far).min(p_max_near.min(p_max_far)); + cluster_max = p_min_near.max(p_min_far).max(p_max_near.max(p_max_far)); + } + + Aabb::from_min_max(cluster_min, cluster_max) +} + +// NOTE: Keep in sync as the inverse of view_z_to_z_slice above +fn z_slice_to_view_z( + near: f32, + far: f32, + z_slices: u32, + z_slice: u32, + is_orthographic: bool, +) -> f32 { + if is_orthographic { + return -near - (far - near) * z_slice as f32 / z_slices as f32; + } + + // Perspective + if z_slice == 0 { + 0.0 + } else { + -near * (far / near).powf((z_slice - 1) as f32 / (z_slices - 1) as f32) + } +} + +fn ndc_position_to_cluster( + cluster_dimensions: UVec3, + cluster_factors: Vec2, + is_orthographic: bool, + ndc_p: Vec3, + view_z: f32, +) -> UVec3 { + let cluster_dimensions_f32 = cluster_dimensions.as_vec3(); + let frag_coord = (ndc_p.xy() * VEC2_HALF_NEGATIVE_Y + VEC2_HALF).clamp(Vec2::ZERO, Vec2::ONE); + let xy = (frag_coord * cluster_dimensions_f32.xy()).floor(); + let z_slice = view_z_to_z_slice( + cluster_factors, + cluster_dimensions.z, + view_z, + is_orthographic, + ); + xy.as_uvec2() + .extend(z_slice) + .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE) +} + +/// Calculate bounds for the light using a view space aabb. +/// Returns a `(Vec3, Vec3)` containing minimum and maximum with +/// `X` and `Y` in normalized device coordinates with range `[-1, 1]` +/// `Z` in view space, with range `[-inf, -f32::MIN_POSITIVE]` +fn cluster_space_light_aabb( + inverse_view_transform: Mat4, + view_inv_scale: Vec3, + projection_matrix: Mat4, + light_sphere: &Sphere, +) -> (Vec3, Vec3) { + let light_aabb_view = Aabb { + center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)), + half_extents: Vec3A::from(light_sphere.radius * view_inv_scale.abs()), + }; + let (mut light_aabb_view_min, mut light_aabb_view_max) = + (light_aabb_view.min(), light_aabb_view.max()); + + // Constrain view z to be negative - i.e. in front of the camera + // When view z is >= 0.0 and we're using a perspective projection, bad things happen. + // At view z == 0.0, ndc x,y are mathematically undefined. At view z > 0.0, i.e. behind the camera, + // the perspective projection flips the directions of the axes. This breaks assumptions about + // use of min/max operations as something that was to the left in view space is now returning a + // coordinate that for view z in front of the camera would be on the right, but at view z behind the + // camera is on the left. So, we just constrain view z to be < 0.0 and necessarily in front of the camera. + light_aabb_view_min.z = light_aabb_view_min.z.min(-f32::MIN_POSITIVE); + light_aabb_view_max.z = light_aabb_view_max.z.min(-f32::MIN_POSITIVE); + + // Is there a cheaper way to do this? The problem is that because of perspective + // the point at max z but min xy may be less xy in screenspace, and similar. As + // such, projecting the min and max xy at both the closer and further z and taking + // the min and max of those projected points addresses this. + let ( + light_aabb_view_xymin_near, + light_aabb_view_xymin_far, + light_aabb_view_xymax_near, + light_aabb_view_xymax_far, + ) = ( + light_aabb_view_min, + light_aabb_view_min.xy().extend(light_aabb_view_max.z), + light_aabb_view_max.xy().extend(light_aabb_view_min.z), + light_aabb_view_max, + ); + let ( + light_aabb_clip_xymin_near, + light_aabb_clip_xymin_far, + light_aabb_clip_xymax_near, + light_aabb_clip_xymax_far, + ) = ( + projection_matrix * light_aabb_view_xymin_near.extend(1.0), + projection_matrix * light_aabb_view_xymin_far.extend(1.0), + projection_matrix * light_aabb_view_xymax_near.extend(1.0), + projection_matrix * light_aabb_view_xymax_far.extend(1.0), + ); + let ( + light_aabb_ndc_xymin_near, + light_aabb_ndc_xymin_far, + light_aabb_ndc_xymax_near, + light_aabb_ndc_xymax_far, + ) = ( + light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w, + light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w, + light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w, + light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w, + ); + let (light_aabb_ndc_min, light_aabb_ndc_max) = ( + light_aabb_ndc_xymin_near + .min(light_aabb_ndc_xymin_far) + .min(light_aabb_ndc_xymax_near) + .min(light_aabb_ndc_xymax_far), + light_aabb_ndc_xymin_near + .max(light_aabb_ndc_xymin_far) + .max(light_aabb_ndc_xymax_near) + .max(light_aabb_ndc_xymax_far), + ); + + // clamp to ndc coords without depth + let (aabb_min_ndc, aabb_max_ndc) = ( + light_aabb_ndc_min.xy().clamp(NDC_MIN, NDC_MAX), + light_aabb_ndc_max.xy().clamp(NDC_MIN, NDC_MAX), + ); + + // pack unadjusted z depth into the vecs + ( + aabb_min_ndc.extend(light_aabb_view_min.z), + aabb_max_ndc.extend(light_aabb_view_max.z), + ) +} + +// Calculate the intersection of a ray from the eye through the view space position to a z plane +fn line_intersection_to_z_plane(origin: Vec3, p: Vec3, z: f32) -> Vec3 { + let v = p - origin; + let t = (z - Vec3::Z.dot(origin)) / Vec3::Z.dot(v); + origin + t * v +} + +// NOTE: Keep in sync with bevy_pbr/src/render/pbr.wgsl +fn view_z_to_z_slice( + cluster_factors: Vec2, + z_slices: u32, + view_z: f32, + is_orthographic: bool, +) -> u32 { + let z_slice = if is_orthographic { + // NOTE: view_z is correct in the orthographic case + ((view_z - cluster_factors.x) * cluster_factors.y).floor() as u32 + } else { + // NOTE: had to use -view_z to make it positive else log(negative) is nan + ((-view_z).ln() * cluster_factors.x - cluster_factors.y + 1.0) as u32 + }; + // NOTE: We use min as we may limit the far z plane used for clustering to be closer than + // the furthest thing being drawn. This means that we need to limit to the maximum cluster. + z_slice.min(z_slices - 1) +} + +fn clip_to_view(inverse_projection: Mat4, clip: Vec4) -> Vec4 { + let view = inverse_projection * clip; + view / view.w +} + +fn screen_to_view(screen_size: Vec2, inverse_projection: Mat4, screen: Vec2, ndc_z: f32) -> Vec4 { + let tex_coord = screen / screen_size; + let clip = Vec4::new( + tex_coord.x * 2.0 - 1.0, + (1.0 - tex_coord.y) * 2.0 - 1.0, + ndc_z, + 1.0, + ); + clip_to_view(inverse_projection, clip) +} + +// NOTE: This exploits the fact that a x-plane normal has only x and z components +fn get_distance_x(plane: HalfSpace, point: Vec3A, is_orthographic: bool) -> f32 { + if is_orthographic { + point.x - plane.d() + } else { + // Distance from a point to a plane: + // signed distance to plane = (nx * px + ny * py + nz * pz + d) / n.length() + // NOTE: For a x-plane, ny and d are 0 and we have a unit normal + // = nx * px + nz * pz + plane.normal_d().xz().dot(point.xz()) + } +} + +// NOTE: This exploits the fact that a z-plane normal has only a z component +fn project_to_plane_z(z_light: Sphere, z_plane: HalfSpace) -> Option { + // p = sphere center + // n = plane normal + // d = n.p if p is in the plane + // NOTE: For a z-plane, nx and ny are both 0 + // d = px * nx + py * ny + pz * nz + // = pz * nz + // => pz = d / nz + let z = z_plane.d() / z_plane.normal_d().z; + let distance_to_plane = z - z_light.center.z; + if distance_to_plane.abs() > z_light.radius { + return None; + } + Some(Sphere { + center: Vec3A::from(z_light.center.xy().extend(z)), + // hypotenuse length = radius + // pythagoras = (distance to plane)^2 + b^2 = radius^2 + radius: (z_light.radius * z_light.radius - distance_to_plane * distance_to_plane).sqrt(), + }) +} + +// NOTE: This exploits the fact that a y-plane normal has only y and z components +fn project_to_plane_y( + y_light: Sphere, + y_plane: HalfSpace, + is_orthographic: bool, +) -> Option { + let distance_to_plane = if is_orthographic { + y_plane.d() - y_light.center.y + } else { + -y_light.center.yz().dot(y_plane.normal_d().yz()) + }; + + if distance_to_plane.abs() > y_light.radius { + return None; + } + Some(Sphere { + center: y_light.center + distance_to_plane * y_plane.normal(), + radius: (y_light.radius * y_light.radius - distance_to_plane * distance_to_plane).sqrt(), + }) +} diff --git a/crates/bevy_pbr/src/cluster/mod.rs b/crates/bevy_pbr/src/cluster/mod.rs new file mode 100644 index 0000000000..a8c218b447 --- /dev/null +++ b/crates/bevy_pbr/src/cluster/mod.rs @@ -0,0 +1,803 @@ +//! Spatial clustering of objects, currently just point and spot lights. + +use std::num::NonZeroU64; + +use bevy_ecs::{ + component::Component, + entity::{Entity, EntityHashMap}, + query::Without, + reflect::ReflectComponent, + system::{Commands, Query, Res, Resource}, + world::{FromWorld, World}, +}; +use bevy_math::{AspectRatio, UVec2, UVec3, UVec4, Vec3Swizzles as _, Vec4}; +use bevy_reflect::{std_traits::ReflectDefault, Reflect}; +use bevy_render::{ + camera::Camera, + render_resource::{ + BindingResource, BufferBindingType, ShaderSize as _, ShaderType, StorageBuffer, + UniformBuffer, + }, + renderer::{RenderDevice, RenderQueue}, + Extract, +}; +use bevy_utils::{hashbrown::HashSet, tracing::warn}; + +pub(crate) use crate::cluster::assign::assign_lights_to_clusters; +use crate::MeshPipeline; + +mod assign; + +#[cfg(test)] +mod test; + +// NOTE: this must be kept in sync with the same constants in pbr.frag +pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256; + +// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that +// at least that many are supported using this constant and SupportedBindingType::from_device() +pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3; + +// this must match CLUSTER_COUNT_SIZE in pbr.wgsl +// and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS +const CLUSTER_COUNT_SIZE: u32 = 9; + +const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - (CLUSTER_COUNT_SIZE * 2))) - 1; +const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1; + +// Clustered-forward rendering notes +// The main initial reference material used was this rather accessible article: +// http://www.aortiz.me/2018/12/21/CG.html +// Some inspiration was taken from “Practical Clustered Shading” which is part 2 of: +// https://efficientshading.com/2015/01/01/real-time-many-light-management-and-shadows-with-clustered-shading/ +// (Also note that Part 3 of the above shows how we could support the shadow mapping for many lights.) +// The z-slicing method mentioned in the aortiz article is originally from Tiago Sousa's Siggraph 2016 talk about Doom 2016: +// http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf + +/// Configure the far z-plane mode used for the furthest depth slice for clustered forward +/// rendering +#[derive(Debug, Copy, Clone, Reflect)] +pub enum ClusterFarZMode { + /// Calculate the required maximum z-depth based on currently visible lights. + /// Makes better use of available clusters, speeding up GPU lighting operations + /// at the expense of some CPU time and using more indices in the cluster light + /// index lists. + MaxLightRange, + /// Constant max z-depth + Constant(f32), +} + +/// Configure the depth-slicing strategy for clustered forward rendering +#[derive(Debug, Copy, Clone, Reflect)] +#[reflect(Default)] +pub struct ClusterZConfig { + /// Far `Z` plane of the first depth slice + pub first_slice_depth: f32, + /// Strategy for how to evaluate the far `Z` plane of the furthest depth slice + pub far_z_mode: ClusterFarZMode, +} + +/// Configuration of the clustering strategy for clustered forward rendering +#[derive(Debug, Copy, Clone, Component, Reflect)] +#[reflect(Component)] +pub enum ClusterConfig { + /// Disable light cluster calculations for this view + None, + /// One single cluster. Optimal for low-light complexity scenes or scenes where + /// most lights affect the entire scene. + Single, + /// Explicit `X`, `Y` and `Z` counts (may yield non-square `X/Y` clusters depending on the aspect ratio) + XYZ { + dimensions: UVec3, + z_config: ClusterZConfig, + /// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding + /// the available cluster-light index limit + dynamic_resizing: bool, + }, + /// Fixed number of `Z` slices, `X` and `Y` calculated to give square clusters + /// with at most total clusters. For top-down games where lights will generally always be within a + /// short depth range, it may be useful to use this configuration with 1 or few `Z` slices. This + /// would reduce the number of lights per cluster by distributing more clusters in screen space + /// `X/Y` which matches how lights are distributed in the scene. + FixedZ { + total: u32, + z_slices: u32, + z_config: ClusterZConfig, + /// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding + /// the available cluster-light index limit + dynamic_resizing: bool, + }, +} + +#[derive(Component, Debug, Default)] +pub struct Clusters { + /// Tile size + pub(crate) tile_size: UVec2, + /// Number of clusters in `X` / `Y` / `Z` in the view frustum + pub(crate) dimensions: UVec3, + /// Distance to the far plane of the first depth slice. The first depth slice is special + /// and explicitly-configured to avoid having unnecessarily many slices close to the camera. + pub(crate) near: f32, + pub(crate) far: f32, + pub(crate) lights: Vec, +} + +#[derive(Clone, Component, Debug, Default)] +pub struct VisiblePointLights { + pub(crate) entities: Vec, + pub point_light_count: usize, + pub spot_light_count: usize, +} + +#[derive(Resource, Default)] +pub struct GlobalVisiblePointLights { + pub(crate) entities: HashSet, +} + +#[derive(Resource)] +pub struct GlobalLightMeta { + pub gpu_point_lights: GpuPointLights, + pub entity_to_index: EntityHashMap, +} + +#[derive(Copy, Clone, ShaderType, Default, Debug)] +pub struct GpuPointLight { + // For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3] + // For spot lights: 2 components of the direction (x,z), spot_scale and spot_offset + pub(crate) light_custom_data: Vec4, + pub(crate) color_inverse_square_range: Vec4, + pub(crate) position_radius: Vec4, + pub(crate) flags: u32, + pub(crate) shadow_depth_bias: f32, + pub(crate) shadow_normal_bias: f32, + pub(crate) spot_light_tan_angle: f32, +} + +pub enum GpuPointLights { + Uniform(UniformBuffer), + Storage(StorageBuffer), +} + +#[derive(ShaderType)] +pub struct GpuPointLightsUniform { + data: Box<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>, +} + +#[derive(ShaderType, Default)] +pub struct GpuPointLightsStorage { + #[size(runtime)] + data: Vec, +} + +#[derive(Component)] +pub struct ExtractedClusterConfig { + /// Special near value for cluster calculations + pub(crate) near: f32, + pub(crate) far: f32, + /// Number of clusters in `X` / `Y` / `Z` in the view frustum + pub(crate) dimensions: UVec3, +} + +enum ExtractedClustersPointLightsElement { + ClusterHeader(u32, u32), + LightEntity(Entity), +} + +#[derive(Component)] +pub struct ExtractedClustersPointLights { + data: Vec, +} + +#[derive(ShaderType)] +struct GpuClusterOffsetsAndCountsUniform { + data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, +} + +#[derive(ShaderType, Default)] +struct GpuClusterLightIndexListsStorage { + #[size(runtime)] + data: Vec, +} + +#[derive(ShaderType, Default)] +struct GpuClusterOffsetsAndCountsStorage { + #[size(runtime)] + data: Vec, +} + +enum ViewClusterBuffers { + Uniform { + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_light_index_lists: UniformBuffer, + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_offsets_and_counts: UniformBuffer, + }, + Storage { + cluster_light_index_lists: StorageBuffer, + cluster_offsets_and_counts: StorageBuffer, + }, +} + +#[derive(Component)] +pub struct ViewClusterBindings { + n_indices: usize, + n_offsets: usize, + buffers: ViewClusterBuffers, +} + +impl Default for ClusterZConfig { + fn default() -> Self { + Self { + first_slice_depth: 5.0, + far_z_mode: ClusterFarZMode::MaxLightRange, + } + } +} + +impl Default for ClusterConfig { + fn default() -> Self { + // 24 depth slices, square clusters with at most 4096 total clusters + // use max light distance as clusters max `Z`-depth, first slice extends to 5.0 + Self::FixedZ { + total: 4096, + z_slices: 24, + z_config: ClusterZConfig::default(), + dynamic_resizing: true, + } + } +} + +impl ClusterConfig { + fn dimensions_for_screen_size(&self, screen_size: UVec2) -> UVec3 { + match &self { + ClusterConfig::None => UVec3::ZERO, + ClusterConfig::Single => UVec3::ONE, + ClusterConfig::XYZ { dimensions, .. } => *dimensions, + ClusterConfig::FixedZ { + total, z_slices, .. + } => { + let aspect_ratio: f32 = + AspectRatio::from_pixels(screen_size.x, screen_size.y).into(); + let mut z_slices = *z_slices; + if *total < z_slices { + warn!("ClusterConfig has more z-slices than total clusters!"); + z_slices = *total; + } + let per_layer = *total as f32 / z_slices as f32; + + let y = f32::sqrt(per_layer / aspect_ratio); + + let mut x = (y * aspect_ratio) as u32; + let mut y = y as u32; + + // check extremes + if x == 0 { + x = 1; + y = per_layer as u32; + } + if y == 0 { + x = per_layer as u32; + y = 1; + } + + UVec3::new(x, y, z_slices) + } + } + } + + fn first_slice_depth(&self) -> f32 { + match self { + ClusterConfig::None | ClusterConfig::Single => 0.0, + ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => { + z_config.first_slice_depth + } + } + } + + fn far_z_mode(&self) -> ClusterFarZMode { + match self { + ClusterConfig::None => ClusterFarZMode::Constant(0.0), + ClusterConfig::Single => ClusterFarZMode::MaxLightRange, + ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => { + z_config.far_z_mode + } + } + } + + fn dynamic_resizing(&self) -> bool { + match self { + ClusterConfig::None | ClusterConfig::Single => false, + ClusterConfig::XYZ { + dynamic_resizing, .. + } + | ClusterConfig::FixedZ { + dynamic_resizing, .. + } => *dynamic_resizing, + } + } +} + +impl Clusters { + fn update(&mut self, screen_size: UVec2, requested_dimensions: UVec3) { + debug_assert!( + requested_dimensions.x > 0 && requested_dimensions.y > 0 && requested_dimensions.z > 0 + ); + + let tile_size = (screen_size.as_vec2() / requested_dimensions.xy().as_vec2()) + .ceil() + .as_uvec2() + .max(UVec2::ONE); + self.tile_size = tile_size; + self.dimensions = (screen_size.as_vec2() / tile_size.as_vec2()) + .ceil() + .as_uvec2() + .extend(requested_dimensions.z) + .max(UVec3::ONE); + + // NOTE: Maximum 4096 clusters due to uniform buffer size constraints + debug_assert!(self.dimensions.x * self.dimensions.y * self.dimensions.z <= 4096); + } + fn clear(&mut self) { + self.tile_size = UVec2::ONE; + self.dimensions = UVec3::ZERO; + self.near = 0.0; + self.far = 0.0; + self.lights.clear(); + } +} + +pub fn add_clusters( + mut commands: Commands, + cameras: Query<(Entity, Option<&ClusterConfig>, &Camera), Without>, +) { + for (entity, config, camera) in &cameras { + if !camera.is_active { + continue; + } + + let config = config.copied().unwrap_or_default(); + // actual settings here don't matter - they will be overwritten in assign_lights_to_clusters + commands + .entity(entity) + .insert((Clusters::default(), config)); + } +} + +impl VisiblePointLights { + #[inline] + pub fn iter(&self) -> impl DoubleEndedIterator { + self.entities.iter() + } + + #[inline] + pub fn len(&self) -> usize { + self.entities.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.entities.is_empty() + } +} + +impl GlobalVisiblePointLights { + #[inline] + pub fn iter(&self) -> impl Iterator { + self.entities.iter() + } + + #[inline] + pub fn contains(&self, entity: Entity) -> bool { + self.entities.contains(&entity) + } +} + +impl FromWorld for GlobalLightMeta { + fn from_world(world: &mut World) -> Self { + Self::new( + world + .resource::() + .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT), + ) + } +} + +impl GlobalLightMeta { + pub fn new(buffer_binding_type: BufferBindingType) -> Self { + Self { + gpu_point_lights: GpuPointLights::new(buffer_binding_type), + entity_to_index: EntityHashMap::default(), + } + } +} + +impl GpuPointLights { + fn new(buffer_binding_type: BufferBindingType) -> Self { + match buffer_binding_type { + BufferBindingType::Storage { .. } => Self::storage(), + BufferBindingType::Uniform => Self::uniform(), + } + } + + fn uniform() -> Self { + Self::Uniform(UniformBuffer::default()) + } + + fn storage() -> Self { + Self::Storage(StorageBuffer::default()) + } + + pub(crate) fn set(&mut self, mut lights: Vec) { + match self { + GpuPointLights::Uniform(buffer) => { + let len = lights.len().min(MAX_UNIFORM_BUFFER_POINT_LIGHTS); + let src = &lights[..len]; + let dst = &mut buffer.get_mut().data[..len]; + dst.copy_from_slice(src); + } + GpuPointLights::Storage(buffer) => { + buffer.get_mut().data.clear(); + buffer.get_mut().data.append(&mut lights); + } + } + } + + pub(crate) fn write_buffer( + &mut self, + render_device: &RenderDevice, + render_queue: &RenderQueue, + ) { + match self { + GpuPointLights::Uniform(buffer) => buffer.write_buffer(render_device, render_queue), + GpuPointLights::Storage(buffer) => buffer.write_buffer(render_device, render_queue), + } + } + + pub fn binding(&self) -> Option { + match self { + GpuPointLights::Uniform(buffer) => buffer.binding(), + GpuPointLights::Storage(buffer) => buffer.binding(), + } + } + + pub fn min_size(buffer_binding_type: BufferBindingType) -> NonZeroU64 { + match buffer_binding_type { + BufferBindingType::Storage { .. } => GpuPointLightsStorage::min_size(), + BufferBindingType::Uniform => GpuPointLightsUniform::min_size(), + } + } +} + +impl Default for GpuPointLightsUniform { + fn default() -> Self { + Self { + data: Box::new([GpuPointLight::default(); MAX_UNIFORM_BUFFER_POINT_LIGHTS]), + } + } +} + +#[allow(clippy::too_many_arguments)] +// Sort lights by +// - point-light vs spot-light, so that we can iterate point lights and spot lights in contiguous blocks in the fragment shader, +// - then those with shadows enabled first, so that the index can be used to render at most `point_light_shadow_maps_count` +// point light shadows and `spot_light_shadow_maps_count` spot light shadow maps, +// - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded. +pub(crate) fn point_light_order( + (entity_1, shadows_enabled_1, is_spot_light_1): (&Entity, &bool, &bool), + (entity_2, shadows_enabled_2, is_spot_light_2): (&Entity, &bool, &bool), +) -> std::cmp::Ordering { + is_spot_light_1 + .cmp(is_spot_light_2) // pointlights before spot lights + .then_with(|| shadows_enabled_2.cmp(shadows_enabled_1)) // shadow casters before non-casters + .then_with(|| entity_1.cmp(entity_2)) // stable +} + +/// Extracts clusters from the main world from the render world. +pub fn extract_clusters( + mut commands: Commands, + views: Extract>, +) { + for (entity, clusters, camera) in &views { + if !camera.is_active { + continue; + } + + let num_entities: usize = clusters.lights.iter().map(|l| l.entities.len()).sum(); + let mut data = Vec::with_capacity(clusters.lights.len() + num_entities); + for cluster_lights in &clusters.lights { + data.push(ExtractedClustersPointLightsElement::ClusterHeader( + cluster_lights.point_light_count as u32, + cluster_lights.spot_light_count as u32, + )); + for l in &cluster_lights.entities { + data.push(ExtractedClustersPointLightsElement::LightEntity(*l)); + } + } + + commands.get_or_spawn(entity).insert(( + ExtractedClustersPointLights { data }, + ExtractedClusterConfig { + near: clusters.near, + far: clusters.far, + dimensions: clusters.dimensions, + }, + )); + } +} + +pub fn prepare_clusters( + mut commands: Commands, + render_device: Res, + render_queue: Res, + mesh_pipeline: Res, + global_light_meta: Res, + views: Query<(Entity, &ExtractedClustersPointLights)>, +) { + let render_device = render_device.into_inner(); + let supports_storage_buffers = matches!( + mesh_pipeline.clustered_forward_buffer_binding_type, + BufferBindingType::Storage { .. } + ); + for (entity, extracted_clusters) in &views { + let mut view_clusters_bindings = + ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type); + view_clusters_bindings.clear(); + + for record in &extracted_clusters.data { + match record { + ExtractedClustersPointLightsElement::ClusterHeader( + point_light_count, + spot_light_count, + ) => { + let offset = view_clusters_bindings.n_indices(); + view_clusters_bindings.push_offset_and_counts( + offset, + *point_light_count as usize, + *spot_light_count as usize, + ); + } + ExtractedClustersPointLightsElement::LightEntity(entity) => { + if let Some(light_index) = global_light_meta.entity_to_index.get(entity) { + if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES + && !supports_storage_buffers + { + warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); + break; + } + view_clusters_bindings.push_index(*light_index); + } + } + } + } + + view_clusters_bindings.write_buffers(render_device, &render_queue); + + commands.get_or_spawn(entity).insert(view_clusters_bindings); + } +} + +impl ViewClusterBindings { + pub const MAX_OFFSETS: usize = 16384 / 4; + const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; + pub const MAX_INDICES: usize = 16384; + + pub fn new(buffer_binding_type: BufferBindingType) -> Self { + Self { + n_indices: 0, + n_offsets: 0, + buffers: ViewClusterBuffers::new(buffer_binding_type), + } + } + + pub fn clear(&mut self) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + *cluster_light_index_lists.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]; + *cluster_offsets_and_counts.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]; + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + .. + } => { + cluster_light_index_lists.get_mut().data.clear(); + cluster_offsets_and_counts.get_mut().data.clear(); + } + } + } + + pub fn push_offset_and_counts(&mut self, offset: usize, point_count: usize, spot_count: usize) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => { + let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 + if array_index >= Self::MAX_UNIFORM_ITEMS { + warn!("cluster offset and count out of bounds!"); + return; + } + let component = self.n_offsets & ((1 << 2) - 1); + let packed = pack_offset_and_counts(offset, point_count, spot_count); + + cluster_offsets_and_counts.get_mut().data[array_index][component] = packed; + } + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => { + cluster_offsets_and_counts.get_mut().data.push(UVec4::new( + offset as u32, + point_count as u32, + spot_count as u32, + 0, + )); + } + } + + self.n_offsets += 1; + } + + pub fn n_indices(&self) -> usize { + self.n_indices + } + + pub fn push_index(&mut self, index: usize) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => { + let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 + let component = (self.n_indices >> 2) & ((1 << 2) - 1); + let sub_index = self.n_indices & ((1 << 2) - 1); + let index = index as u32; + + cluster_light_index_lists.get_mut().data[array_index][component] |= + index << (8 * sub_index); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => { + cluster_light_index_lists.get_mut().data.push(index as u32); + } + } + + self.n_indices += 1; + } + + pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + } + } + + pub fn light_index_lists_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + } + } + + pub fn offsets_and_counts_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + } + } + + pub fn min_size_cluster_light_index_lists( + buffer_binding_type: BufferBindingType, + ) -> NonZeroU64 { + match buffer_binding_type { + BufferBindingType::Storage { .. } => GpuClusterLightIndexListsStorage::min_size(), + BufferBindingType::Uniform => GpuClusterLightIndexListsUniform::min_size(), + } + } + + pub fn min_size_cluster_offsets_and_counts( + buffer_binding_type: BufferBindingType, + ) -> NonZeroU64 { + match buffer_binding_type { + BufferBindingType::Storage { .. } => GpuClusterOffsetsAndCountsStorage::min_size(), + BufferBindingType::Uniform => GpuClusterOffsetsAndCountsUniform::min_size(), + } + } +} + +impl ViewClusterBuffers { + fn new(buffer_binding_type: BufferBindingType) -> Self { + match buffer_binding_type { + BufferBindingType::Storage { .. } => Self::storage(), + BufferBindingType::Uniform => Self::uniform(), + } + } + + fn uniform() -> Self { + ViewClusterBuffers::Uniform { + cluster_light_index_lists: UniformBuffer::default(), + cluster_offsets_and_counts: UniformBuffer::default(), + } + } + + fn storage() -> Self { + ViewClusterBuffers::Storage { + cluster_light_index_lists: StorageBuffer::default(), + cluster_offsets_and_counts: StorageBuffer::default(), + } + } +} + +// NOTE: With uniform buffer max binding size as 16384 bytes +// that means we can fit 256 point lights in one uniform +// buffer, which means the count can be at most 256 so it +// needs 9 bits. +// The array of indices can also use u8 and that means the +// offset in to the array of indices needs to be able to address +// 16384 values. log2(16384) = 14 bits. +// We use 32 bits to store the offset and counts so +// we pack the offset into the upper 14 bits of a u32, +// the point light count into bits 9-17, and the spot light count into bits 0-8. +// [ 31 .. 18 | 17 .. 9 | 8 .. 0 ] +// [ offset | point light count | spot light count ] +// NOTE: This assumes CPU and GPU endianness are the same which is true +// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs +fn pack_offset_and_counts(offset: usize, point_count: usize, spot_count: usize) -> u32 { + ((offset as u32 & CLUSTER_OFFSET_MASK) << (CLUSTER_COUNT_SIZE * 2)) + | (point_count as u32 & CLUSTER_COUNT_MASK) << CLUSTER_COUNT_SIZE + | (spot_count as u32 & CLUSTER_COUNT_MASK) +} + +#[derive(ShaderType)] +struct GpuClusterLightIndexListsUniform { + data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, +} + +// NOTE: Assert at compile time that GpuClusterLightIndexListsUniform +// fits within the maximum uniform buffer binding size +const _: () = assert!(GpuClusterLightIndexListsUniform::SHADER_SIZE.get() <= 16384); + +impl Default for GpuClusterLightIndexListsUniform { + fn default() -> Self { + Self { + data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]), + } + } +} + +impl Default for GpuClusterOffsetsAndCountsUniform { + fn default() -> Self { + Self { + data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]), + } + } +} diff --git a/crates/bevy_pbr/src/cluster/test.rs b/crates/bevy_pbr/src/cluster/test.rs new file mode 100644 index 0000000000..23809da7f6 --- /dev/null +++ b/crates/bevy_pbr/src/cluster/test.rs @@ -0,0 +1,54 @@ +use bevy_math::UVec2; + +use crate::{ClusterConfig, Clusters}; + +fn test_cluster_tiling(config: ClusterConfig, screen_size: UVec2) -> Clusters { + let dims = config.dimensions_for_screen_size(screen_size); + + // note: near & far do not affect tiling + let mut clusters = Clusters::default(); + clusters.update(screen_size, dims); + + // check we cover the screen + assert!(clusters.tile_size.x * clusters.dimensions.x >= screen_size.x); + assert!(clusters.tile_size.y * clusters.dimensions.y >= screen_size.y); + // check a smaller number of clusters would not cover the screen + assert!(clusters.tile_size.x * (clusters.dimensions.x - 1) < screen_size.x); + assert!(clusters.tile_size.y * (clusters.dimensions.y - 1) < screen_size.y); + // check a smaller tile size would not cover the screen + assert!((clusters.tile_size.x - 1) * clusters.dimensions.x < screen_size.x); + assert!((clusters.tile_size.y - 1) * clusters.dimensions.y < screen_size.y); + // check we don't have more clusters than pixels + assert!(clusters.dimensions.x <= screen_size.x); + assert!(clusters.dimensions.y <= screen_size.y); + + clusters +} + +#[test] +// check tiling for small screen sizes +fn test_default_cluster_setup_small_screensizes() { + for x in 1..100 { + for y in 1..100 { + let screen_size = UVec2::new(x, y); + let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); + assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096); + } + } +} + +#[test] +// check tiling for long thin screen sizes +fn test_default_cluster_setup_small_x() { + for x in 1..10 { + for y in 1..5000 { + let screen_size = UVec2::new(x, y); + let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); + assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096); + + let screen_size = UVec2::new(y, x); + let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); + assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096); + } + } +} diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 179e0e5e45..8d26a98c5b 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -22,6 +22,7 @@ pub mod experimental { } mod bundle; +mod cluster; pub mod deferred; mod extended_material; mod fog; @@ -41,6 +42,7 @@ use bevy_color::{Color, LinearRgba}; use std::marker::PhantomData; pub use bundle::*; +pub use cluster::*; pub use extended_material::*; pub use fog::*; pub use light::*; @@ -337,7 +339,7 @@ impl Plugin for PbrPlugin { PostUpdate, ( add_clusters.in_set(SimulationLightSystems::AddClusters), - assign_lights_to_clusters + crate::assign_lights_to_clusters .in_set(SimulationLightSystems::AssignLightsToClusters) .after(TransformSystem::TransformPropagate) .after(VisibilitySystems::CheckVisibility) diff --git a/crates/bevy_pbr/src/light/mod.rs b/crates/bevy_pbr/src/light/mod.rs index c0b8260245..aabae1b443 100644 --- a/crates/bevy_pbr/src/light/mod.rs +++ b/crates/bevy_pbr/src/light/mod.rs @@ -1,26 +1,19 @@ -use std::collections::HashSet; - use bevy_ecs::entity::EntityHashMap; use bevy_ecs::prelude::*; -use bevy_math::{ - AspectRatio, Mat4, UVec2, UVec3, Vec2, Vec3, Vec3A, Vec3Swizzles, Vec4, Vec4Swizzles, -}; +use bevy_math::{Mat4, Vec3A, Vec4}; use bevy_reflect::prelude::*; use bevy_render::{ camera::{Camera, CameraProjection}, extract_component::ExtractComponent, extract_resource::ExtractResource, mesh::Mesh, - primitives::{Aabb, CascadesFrusta, CubemapFrusta, Frustum, HalfSpace, Sphere}, - render_resource::BufferBindingType, - renderer::RenderDevice, + primitives::{Aabb, CascadesFrusta, CubemapFrusta, Frustum, Sphere}, view::{ InheritedVisibility, RenderLayers, ViewVisibility, VisibilityRange, VisibleEntities, VisibleEntityRanges, WithMesh, }, }; use bevy_transform::components::{GlobalTransform, Transform}; -use bevy_utils::tracing::warn; use crate::*; @@ -510,502 +503,6 @@ pub enum SimulationLightSystems { CheckLightVisibility, } -// Clustered-forward rendering notes -// The main initial reference material used was this rather accessible article: -// http://www.aortiz.me/2018/12/21/CG.html -// Some inspiration was taken from “Practical Clustered Shading” which is part 2 of: -// https://efficientshading.com/2015/01/01/real-time-many-light-management-and-shadows-with-clustered-shading/ -// (Also note that Part 3 of the above shows how we could support the shadow mapping for many lights.) -// The z-slicing method mentioned in the aortiz article is originally from Tiago Sousa's Siggraph 2016 talk about Doom 2016: -// http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf - -/// Configure the far z-plane mode used for the furthest depth slice for clustered forward -/// rendering -#[derive(Debug, Copy, Clone, Reflect)] -pub enum ClusterFarZMode { - /// Calculate the required maximum z-depth based on currently visible lights. - /// Makes better use of available clusters, speeding up GPU lighting operations - /// at the expense of some CPU time and using more indices in the cluster light - /// index lists. - MaxLightRange, - /// Constant max z-depth - Constant(f32), -} - -/// Configure the depth-slicing strategy for clustered forward rendering -#[derive(Debug, Copy, Clone, Reflect)] -#[reflect(Default)] -pub struct ClusterZConfig { - /// Far `Z` plane of the first depth slice - pub first_slice_depth: f32, - /// Strategy for how to evaluate the far `Z` plane of the furthest depth slice - pub far_z_mode: ClusterFarZMode, -} - -impl Default for ClusterZConfig { - fn default() -> Self { - Self { - first_slice_depth: 5.0, - far_z_mode: ClusterFarZMode::MaxLightRange, - } - } -} - -/// Configuration of the clustering strategy for clustered forward rendering -#[derive(Debug, Copy, Clone, Component, Reflect)] -#[reflect(Component)] -pub enum ClusterConfig { - /// Disable light cluster calculations for this view - None, - /// One single cluster. Optimal for low-light complexity scenes or scenes where - /// most lights affect the entire scene. - Single, - /// Explicit `X`, `Y` and `Z` counts (may yield non-square `X/Y` clusters depending on the aspect ratio) - XYZ { - dimensions: UVec3, - z_config: ClusterZConfig, - /// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding - /// the available cluster-light index limit - dynamic_resizing: bool, - }, - /// Fixed number of `Z` slices, `X` and `Y` calculated to give square clusters - /// with at most total clusters. For top-down games where lights will generally always be within a - /// short depth range, it may be useful to use this configuration with 1 or few `Z` slices. This - /// would reduce the number of lights per cluster by distributing more clusters in screen space - /// `X/Y` which matches how lights are distributed in the scene. - FixedZ { - total: u32, - z_slices: u32, - z_config: ClusterZConfig, - /// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding - /// the available cluster-light index limit - dynamic_resizing: bool, - }, -} - -impl Default for ClusterConfig { - fn default() -> Self { - // 24 depth slices, square clusters with at most 4096 total clusters - // use max light distance as clusters max `Z`-depth, first slice extends to 5.0 - Self::FixedZ { - total: 4096, - z_slices: 24, - z_config: ClusterZConfig::default(), - dynamic_resizing: true, - } - } -} - -impl ClusterConfig { - fn dimensions_for_screen_size(&self, screen_size: UVec2) -> UVec3 { - match &self { - ClusterConfig::None => UVec3::ZERO, - ClusterConfig::Single => UVec3::ONE, - ClusterConfig::XYZ { dimensions, .. } => *dimensions, - ClusterConfig::FixedZ { - total, z_slices, .. - } => { - let aspect_ratio: f32 = - AspectRatio::from_pixels(screen_size.x, screen_size.y).into(); - let mut z_slices = *z_slices; - if *total < z_slices { - warn!("ClusterConfig has more z-slices than total clusters!"); - z_slices = *total; - } - let per_layer = *total as f32 / z_slices as f32; - - let y = f32::sqrt(per_layer / aspect_ratio); - - let mut x = (y * aspect_ratio) as u32; - let mut y = y as u32; - - // check extremes - if x == 0 { - x = 1; - y = per_layer as u32; - } - if y == 0 { - x = per_layer as u32; - y = 1; - } - - UVec3::new(x, y, z_slices) - } - } - } - - fn first_slice_depth(&self) -> f32 { - match self { - ClusterConfig::None | ClusterConfig::Single => 0.0, - ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => { - z_config.first_slice_depth - } - } - } - - fn far_z_mode(&self) -> ClusterFarZMode { - match self { - ClusterConfig::None => ClusterFarZMode::Constant(0.0), - ClusterConfig::Single => ClusterFarZMode::MaxLightRange, - ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => { - z_config.far_z_mode - } - } - } - - fn dynamic_resizing(&self) -> bool { - match self { - ClusterConfig::None | ClusterConfig::Single => false, - ClusterConfig::XYZ { - dynamic_resizing, .. - } - | ClusterConfig::FixedZ { - dynamic_resizing, .. - } => *dynamic_resizing, - } - } -} - -#[derive(Component, Debug, Default)] -pub struct Clusters { - /// Tile size - pub(crate) tile_size: UVec2, - /// Number of clusters in `X` / `Y` / `Z` in the view frustum - pub(crate) dimensions: UVec3, - /// Distance to the far plane of the first depth slice. The first depth slice is special - /// and explicitly-configured to avoid having unnecessarily many slices close to the camera. - pub(crate) near: f32, - pub(crate) far: f32, - pub(crate) lights: Vec, -} - -impl Clusters { - fn update(&mut self, screen_size: UVec2, requested_dimensions: UVec3) { - debug_assert!( - requested_dimensions.x > 0 && requested_dimensions.y > 0 && requested_dimensions.z > 0 - ); - - let tile_size = (screen_size.as_vec2() / requested_dimensions.xy().as_vec2()) - .ceil() - .as_uvec2() - .max(UVec2::ONE); - self.tile_size = tile_size; - self.dimensions = (screen_size.as_vec2() / tile_size.as_vec2()) - .ceil() - .as_uvec2() - .extend(requested_dimensions.z) - .max(UVec3::ONE); - - // NOTE: Maximum 4096 clusters due to uniform buffer size constraints - debug_assert!(self.dimensions.x * self.dimensions.y * self.dimensions.z <= 4096); - } - fn clear(&mut self) { - self.tile_size = UVec2::ONE; - self.dimensions = UVec3::ZERO; - self.near = 0.0; - self.far = 0.0; - self.lights.clear(); - } -} - -fn clip_to_view(inverse_projection: Mat4, clip: Vec4) -> Vec4 { - let view = inverse_projection * clip; - view / view.w -} - -pub fn add_clusters( - mut commands: Commands, - cameras: Query<(Entity, Option<&ClusterConfig>, &Camera), Without>, -) { - for (entity, config, camera) in &cameras { - if !camera.is_active { - continue; - } - - let config = config.copied().unwrap_or_default(); - // actual settings here don't matter - they will be overwritten in assign_lights_to_clusters - commands - .entity(entity) - .insert((Clusters::default(), config)); - } -} - -#[derive(Clone, Component, Debug, Default)] -pub struct VisiblePointLights { - pub(crate) entities: Vec, - pub point_light_count: usize, - pub spot_light_count: usize, -} - -impl VisiblePointLights { - #[inline] - pub fn iter(&self) -> impl DoubleEndedIterator { - self.entities.iter() - } - - #[inline] - pub fn len(&self) -> usize { - self.entities.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.entities.is_empty() - } -} - -// NOTE: Keep in sync with bevy_pbr/src/render/pbr.wgsl -fn view_z_to_z_slice( - cluster_factors: Vec2, - z_slices: u32, - view_z: f32, - is_orthographic: bool, -) -> u32 { - let z_slice = if is_orthographic { - // NOTE: view_z is correct in the orthographic case - ((view_z - cluster_factors.x) * cluster_factors.y).floor() as u32 - } else { - // NOTE: had to use -view_z to make it positive else log(negative) is nan - ((-view_z).ln() * cluster_factors.x - cluster_factors.y + 1.0) as u32 - }; - // NOTE: We use min as we may limit the far z plane used for clustering to be closer than - // the furthest thing being drawn. This means that we need to limit to the maximum cluster. - z_slice.min(z_slices - 1) -} - -// NOTE: Keep in sync as the inverse of view_z_to_z_slice above -fn z_slice_to_view_z( - near: f32, - far: f32, - z_slices: u32, - z_slice: u32, - is_orthographic: bool, -) -> f32 { - if is_orthographic { - return -near - (far - near) * z_slice as f32 / z_slices as f32; - } - - // Perspective - if z_slice == 0 { - 0.0 - } else { - -near * (far / near).powf((z_slice - 1) as f32 / (z_slices - 1) as f32) - } -} - -fn ndc_position_to_cluster( - cluster_dimensions: UVec3, - cluster_factors: Vec2, - is_orthographic: bool, - ndc_p: Vec3, - view_z: f32, -) -> UVec3 { - let cluster_dimensions_f32 = cluster_dimensions.as_vec3(); - let frag_coord = (ndc_p.xy() * VEC2_HALF_NEGATIVE_Y + VEC2_HALF).clamp(Vec2::ZERO, Vec2::ONE); - let xy = (frag_coord * cluster_dimensions_f32.xy()).floor(); - let z_slice = view_z_to_z_slice( - cluster_factors, - cluster_dimensions.z, - view_z, - is_orthographic, - ); - xy.as_uvec2() - .extend(z_slice) - .clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE) -} - -const VEC2_HALF: Vec2 = Vec2::splat(0.5); -const VEC2_HALF_NEGATIVE_Y: Vec2 = Vec2::new(0.5, -0.5); - -/// Calculate bounds for the light using a view space aabb. -/// Returns a `(Vec3, Vec3)` containing minimum and maximum with -/// `X` and `Y` in normalized device coordinates with range `[-1, 1]` -/// `Z` in view space, with range `[-inf, -f32::MIN_POSITIVE]` -fn cluster_space_light_aabb( - inverse_view_transform: Mat4, - view_inv_scale: Vec3, - projection_matrix: Mat4, - light_sphere: &Sphere, -) -> (Vec3, Vec3) { - let light_aabb_view = Aabb { - center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)), - half_extents: Vec3A::from(light_sphere.radius * view_inv_scale.abs()), - }; - let (mut light_aabb_view_min, mut light_aabb_view_max) = - (light_aabb_view.min(), light_aabb_view.max()); - - // Constrain view z to be negative - i.e. in front of the camera - // When view z is >= 0.0 and we're using a perspective projection, bad things happen. - // At view z == 0.0, ndc x,y are mathematically undefined. At view z > 0.0, i.e. behind the camera, - // the perspective projection flips the directions of the axes. This breaks assumptions about - // use of min/max operations as something that was to the left in view space is now returning a - // coordinate that for view z in front of the camera would be on the right, but at view z behind the - // camera is on the left. So, we just constrain view z to be < 0.0 and necessarily in front of the camera. - light_aabb_view_min.z = light_aabb_view_min.z.min(-f32::MIN_POSITIVE); - light_aabb_view_max.z = light_aabb_view_max.z.min(-f32::MIN_POSITIVE); - - // Is there a cheaper way to do this? The problem is that because of perspective - // the point at max z but min xy may be less xy in screenspace, and similar. As - // such, projecting the min and max xy at both the closer and further z and taking - // the min and max of those projected points addresses this. - let ( - light_aabb_view_xymin_near, - light_aabb_view_xymin_far, - light_aabb_view_xymax_near, - light_aabb_view_xymax_far, - ) = ( - light_aabb_view_min, - light_aabb_view_min.xy().extend(light_aabb_view_max.z), - light_aabb_view_max.xy().extend(light_aabb_view_min.z), - light_aabb_view_max, - ); - let ( - light_aabb_clip_xymin_near, - light_aabb_clip_xymin_far, - light_aabb_clip_xymax_near, - light_aabb_clip_xymax_far, - ) = ( - projection_matrix * light_aabb_view_xymin_near.extend(1.0), - projection_matrix * light_aabb_view_xymin_far.extend(1.0), - projection_matrix * light_aabb_view_xymax_near.extend(1.0), - projection_matrix * light_aabb_view_xymax_far.extend(1.0), - ); - let ( - light_aabb_ndc_xymin_near, - light_aabb_ndc_xymin_far, - light_aabb_ndc_xymax_near, - light_aabb_ndc_xymax_far, - ) = ( - light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w, - light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w, - light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w, - light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w, - ); - let (light_aabb_ndc_min, light_aabb_ndc_max) = ( - light_aabb_ndc_xymin_near - .min(light_aabb_ndc_xymin_far) - .min(light_aabb_ndc_xymax_near) - .min(light_aabb_ndc_xymax_far), - light_aabb_ndc_xymin_near - .max(light_aabb_ndc_xymin_far) - .max(light_aabb_ndc_xymax_near) - .max(light_aabb_ndc_xymax_far), - ); - - // clamp to ndc coords without depth - let (aabb_min_ndc, aabb_max_ndc) = ( - light_aabb_ndc_min.xy().clamp(NDC_MIN, NDC_MAX), - light_aabb_ndc_max.xy().clamp(NDC_MIN, NDC_MAX), - ); - - // pack unadjusted z depth into the vecs - ( - aabb_min_ndc.extend(light_aabb_view_min.z), - aabb_max_ndc.extend(light_aabb_view_max.z), - ) -} - -fn screen_to_view(screen_size: Vec2, inverse_projection: Mat4, screen: Vec2, ndc_z: f32) -> Vec4 { - let tex_coord = screen / screen_size; - let clip = Vec4::new( - tex_coord.x * 2.0 - 1.0, - (1.0 - tex_coord.y) * 2.0 - 1.0, - ndc_z, - 1.0, - ); - clip_to_view(inverse_projection, clip) -} -const NDC_MIN: Vec2 = Vec2::NEG_ONE; -const NDC_MAX: Vec2 = Vec2::ONE; - -// Calculate the intersection of a ray from the eye through the view space position to a z plane -fn line_intersection_to_z_plane(origin: Vec3, p: Vec3, z: f32) -> Vec3 { - let v = p - origin; - let t = (z - Vec3::Z.dot(origin)) / Vec3::Z.dot(v); - origin + t * v -} - -#[allow(clippy::too_many_arguments)] -fn compute_aabb_for_cluster( - z_near: f32, - z_far: f32, - tile_size: Vec2, - screen_size: Vec2, - inverse_projection: Mat4, - is_orthographic: bool, - cluster_dimensions: UVec3, - ijk: UVec3, -) -> Aabb { - let ijk = ijk.as_vec3(); - - // Calculate the minimum and maximum points in screen space - let p_min = ijk.xy() * tile_size; - let p_max = p_min + tile_size; - - let cluster_min; - let cluster_max; - if is_orthographic { - // Use linear depth slicing for orthographic - - // Convert to view space at the cluster near and far planes - // NOTE: 1.0 is the near plane due to using reverse z projections - let mut p_min = screen_to_view(screen_size, inverse_projection, p_min, 0.0).xyz(); - let mut p_max = screen_to_view(screen_size, inverse_projection, p_max, 0.0).xyz(); - - // calculate cluster depth using z_near and z_far - p_min.z = -z_near + (z_near - z_far) * ijk.z / cluster_dimensions.z as f32; - p_max.z = -z_near + (z_near - z_far) * (ijk.z + 1.0) / cluster_dimensions.z as f32; - - cluster_min = p_min.min(p_max); - cluster_max = p_min.max(p_max); - } else { - // Convert to view space at the near plane - // NOTE: 1.0 is the near plane due to using reverse z projections - let p_min = screen_to_view(screen_size, inverse_projection, p_min, 1.0); - let p_max = screen_to_view(screen_size, inverse_projection, p_max, 1.0); - - let z_far_over_z_near = -z_far / -z_near; - let cluster_near = if ijk.z == 0.0 { - 0.0 - } else { - -z_near * z_far_over_z_near.powf((ijk.z - 1.0) / (cluster_dimensions.z - 1) as f32) - }; - // NOTE: This could be simplified to: - // cluster_far = cluster_near * z_far_over_z_near; - let cluster_far = if cluster_dimensions.z == 1 { - -z_far - } else { - -z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32) - }; - - // Calculate the four intersection points of the min and max points with the cluster near and far planes - let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near); - let p_min_far = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_far); - let p_max_near = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_near); - let p_max_far = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_far); - - cluster_min = p_min_near.min(p_min_far).min(p_max_near.min(p_max_far)); - cluster_max = p_min_near.max(p_min_far).max(p_max_near.max(p_max_far)); - } - - Aabb::from_min_max(cluster_min, cluster_max) -} - -// Sort lights by -// - point-light vs spot-light, so that we can iterate point lights and spot lights in contiguous blocks in the fragment shader, -// - then those with shadows enabled first, so that the index can be used to render at most `point_light_shadow_maps_count` -// point light shadows and `spot_light_shadow_maps_count` spot light shadow maps, -// - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded. -pub(crate) fn point_light_order( - (entity_1, shadows_enabled_1, is_spot_light_1): (&Entity, &bool, &bool), - (entity_2, shadows_enabled_2, is_spot_light_2): (&Entity, &bool, &bool), -) -> std::cmp::Ordering { - is_spot_light_1 - .cmp(is_spot_light_2) // pointlights before spot lights - .then_with(|| shadows_enabled_2.cmp(shadows_enabled_1)) // shadow casters before non-casters - .then_with(|| entity_1.cmp(entity_2)) // stable -} - // Sort lights by // - those with volumetric (and shadows) enabled first, so that the volumetric // lighting pass can quickly find the volumetric lights; @@ -1024,703 +521,6 @@ pub(crate) fn directional_light_order( .then_with(|| entity_1.cmp(entity_2)) // stable } -#[derive(Clone)] -// data required for assigning lights to clusters -pub(crate) struct PointLightAssignmentData { - entity: Entity, - transform: GlobalTransform, - range: f32, - shadows_enabled: bool, - spot_light_angle: Option, - render_layers: RenderLayers, -} - -impl PointLightAssignmentData { - pub fn sphere(&self) -> Sphere { - Sphere { - center: self.transform.translation_vec3a(), - radius: self.range, - } - } -} - -#[derive(Resource, Default)] -pub struct GlobalVisiblePointLights { - entities: HashSet, -} - -impl GlobalVisiblePointLights { - #[inline] - pub fn iter(&self) -> impl Iterator { - self.entities.iter() - } - - #[inline] - pub fn contains(&self, entity: Entity) -> bool { - self.entities.contains(&entity) - } -} - -// NOTE: Run this before update_point_light_frusta! -#[allow(clippy::too_many_arguments)] -pub(crate) fn assign_lights_to_clusters( - mut commands: Commands, - mut global_lights: ResMut, - mut views: Query<( - Entity, - &GlobalTransform, - &Camera, - &Frustum, - &ClusterConfig, - &mut Clusters, - Option<&RenderLayers>, - Option<&mut VisiblePointLights>, - )>, - point_lights_query: Query<( - Entity, - &GlobalTransform, - &PointLight, - Option<&RenderLayers>, - &ViewVisibility, - )>, - spot_lights_query: Query<( - Entity, - &GlobalTransform, - &SpotLight, - Option<&RenderLayers>, - &ViewVisibility, - )>, - mut lights: Local>, - mut cluster_aabb_spheres: Local>>, - mut max_point_lights_warning_emitted: Local, - render_device: Option>, -) { - let Some(render_device) = render_device else { - return; - }; - - global_lights.entities.clear(); - lights.clear(); - // collect just the relevant light query data into a persisted vec to avoid reallocating each frame - lights.extend( - point_lights_query - .iter() - .filter(|(.., visibility)| visibility.get()) - .map( - |(entity, transform, point_light, maybe_layers, _visibility)| { - PointLightAssignmentData { - entity, - transform: GlobalTransform::from_translation(transform.translation()), - shadows_enabled: point_light.shadows_enabled, - range: point_light.range, - spot_light_angle: None, - render_layers: maybe_layers.unwrap_or_default().clone(), - } - }, - ), - ); - lights.extend( - spot_lights_query - .iter() - .filter(|(.., visibility)| visibility.get()) - .map( - |(entity, transform, spot_light, maybe_layers, _visibility)| { - PointLightAssignmentData { - entity, - transform: *transform, - shadows_enabled: spot_light.shadows_enabled, - range: spot_light.range, - spot_light_angle: Some(spot_light.outer_angle), - render_layers: maybe_layers.unwrap_or_default().clone(), - } - }, - ), - ); - - let clustered_forward_buffer_binding_type = - render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); - let supports_storage_buffers = matches!( - clustered_forward_buffer_binding_type, - BufferBindingType::Storage { .. } - ); - if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !supports_storage_buffers { - lights.sort_by(|light_1, light_2| { - point_light_order( - ( - &light_1.entity, - &light_1.shadows_enabled, - &light_1.spot_light_angle.is_some(), - ), - ( - &light_2.entity, - &light_2.shadows_enabled, - &light_2.spot_light_angle.is_some(), - ), - ) - }); - - // check each light against each view's frustum, keep only those that affect at least one of our views - let frusta: Vec<_> = views - .iter() - .map(|(_, _, _, frustum, _, _, _, _)| *frustum) - .collect(); - let mut lights_in_view_count = 0; - lights.retain(|light| { - // take one extra light to check if we should emit the warning - if lights_in_view_count == MAX_UNIFORM_BUFFER_POINT_LIGHTS + 1 { - false - } else { - let light_sphere = light.sphere(); - let light_in_view = frusta - .iter() - .any(|frustum| frustum.intersects_sphere(&light_sphere, true)); - - if light_in_view { - lights_in_view_count += 1; - } - - light_in_view - } - }); - - if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !*max_point_lights_warning_emitted { - warn!( - "MAX_UNIFORM_BUFFER_POINT_LIGHTS ({}) exceeded", - MAX_UNIFORM_BUFFER_POINT_LIGHTS - ); - *max_point_lights_warning_emitted = true; - } - - lights.truncate(MAX_UNIFORM_BUFFER_POINT_LIGHTS); - } - - for ( - view_entity, - camera_transform, - camera, - frustum, - config, - clusters, - maybe_layers, - mut visible_lights, - ) in &mut views - { - let view_layers = maybe_layers.unwrap_or_default(); - let clusters = clusters.into_inner(); - - if matches!(config, ClusterConfig::None) { - if visible_lights.is_some() { - commands.entity(view_entity).remove::(); - } - clusters.clear(); - continue; - } - - let Some(screen_size) = camera.physical_viewport_size() else { - clusters.clear(); - continue; - }; - - let mut requested_cluster_dimensions = config.dimensions_for_screen_size(screen_size); - - let view_transform = camera_transform.compute_matrix(); - let view_inv_scale = camera_transform.compute_transform().scale.recip(); - let view_inv_scale_max = view_inv_scale.abs().max_element(); - let inverse_view_transform = view_transform.inverse(); - let is_orthographic = camera.projection_matrix().w_axis.w == 1.0; - - let far_z = match config.far_z_mode() { - ClusterFarZMode::MaxLightRange => { - let inverse_view_row_2 = inverse_view_transform.row(2); - lights - .iter() - .map(|light| { - -inverse_view_row_2.dot(light.transform.translation().extend(1.0)) - + light.range * view_inv_scale.z - }) - .reduce(f32::max) - .unwrap_or(0.0) - } - ClusterFarZMode::Constant(far) => far, - }; - let first_slice_depth = match (is_orthographic, requested_cluster_dimensions.z) { - (true, _) => { - // NOTE: Based on glam's Mat4::orthographic_rh(), as used to calculate the orthographic projection - // matrix, we can calculate the projection's view-space near plane as follows: - // component 3,2 = r * near and 2,2 = r where r = 1.0 / (near - far) - // There is a caveat here that when calculating the projection matrix, near and far were swapped to give - // reversed z, consistent with the perspective projection. So, - // 3,2 = r * far and 2,2 = r where r = 1.0 / (far - near) - // rearranging r = 1.0 / (far - near), r * (far - near) = 1.0, r * far - 1.0 = r * near, near = (r * far - 1.0) / r - // = (3,2 - 1.0) / 2,2 - (camera.projection_matrix().w_axis.z - 1.0) / camera.projection_matrix().z_axis.z - } - (false, 1) => config.first_slice_depth().max(far_z), - _ => config.first_slice_depth(), - }; - let first_slice_depth = first_slice_depth * view_inv_scale.z; - - // NOTE: Ensure the far_z is at least as far as the first_depth_slice to avoid clustering problems. - let far_z = far_z.max(first_slice_depth); - let cluster_factors = calculate_cluster_factors( - first_slice_depth, - far_z, - requested_cluster_dimensions.z as f32, - is_orthographic, - ); - - if config.dynamic_resizing() { - let mut cluster_index_estimate = 0.0; - for light in &lights { - let light_sphere = light.sphere(); - - // Check if the light is within the view frustum - if !frustum.intersects_sphere(&light_sphere, true) { - continue; - } - - // calculate a conservative aabb estimate of number of clusters affected by this light - // this overestimates index counts by at most 50% (and typically much less) when the whole light range is in view - // it can overestimate more significantly when light ranges are only partially in view - let (light_aabb_min, light_aabb_max) = cluster_space_light_aabb( - inverse_view_transform, - view_inv_scale, - camera.projection_matrix(), - &light_sphere, - ); - - // since we won't adjust z slices we can calculate exact number of slices required in z dimension - let z_cluster_min = view_z_to_z_slice( - cluster_factors, - requested_cluster_dimensions.z, - light_aabb_min.z, - is_orthographic, - ); - let z_cluster_max = view_z_to_z_slice( - cluster_factors, - requested_cluster_dimensions.z, - light_aabb_max.z, - is_orthographic, - ); - let z_count = - z_cluster_min.max(z_cluster_max) - z_cluster_min.min(z_cluster_max) + 1; - - // calculate x/y count using floats to avoid overestimating counts due to large initial tile sizes - let xy_min = light_aabb_min.xy(); - let xy_max = light_aabb_max.xy(); - // multiply by 0.5 to move from [-1,1] to [-0.5, 0.5], max extent of 1 in each dimension - let xy_count = (xy_max - xy_min) - * 0.5 - * Vec2::new( - requested_cluster_dimensions.x as f32, - requested_cluster_dimensions.y as f32, - ); - - // add up to 2 to each axis to account for overlap - let x_overlap = if xy_min.x <= -1.0 { 0.0 } else { 1.0 } - + if xy_max.x >= 1.0 { 0.0 } else { 1.0 }; - let y_overlap = if xy_min.y <= -1.0 { 0.0 } else { 1.0 } - + if xy_max.y >= 1.0 { 0.0 } else { 1.0 }; - cluster_index_estimate += - (xy_count.x + x_overlap) * (xy_count.y + y_overlap) * z_count as f32; - } - - if cluster_index_estimate > ViewClusterBindings::MAX_INDICES as f32 { - // scale x and y cluster count to be able to fit all our indices - - // we take the ratio of the actual indices over the index estimate. - // this is not guaranteed to be small enough due to overlapped tiles, but - // the conservative estimate is more than sufficient to cover the - // difference - let index_ratio = ViewClusterBindings::MAX_INDICES as f32 / cluster_index_estimate; - let xy_ratio = index_ratio.sqrt(); - - requested_cluster_dimensions.x = - ((requested_cluster_dimensions.x as f32 * xy_ratio).floor() as u32).max(1); - requested_cluster_dimensions.y = - ((requested_cluster_dimensions.y as f32 * xy_ratio).floor() as u32).max(1); - } - } - - clusters.update(screen_size, requested_cluster_dimensions); - clusters.near = first_slice_depth; - clusters.far = far_z; - - // NOTE: Maximum 4096 clusters due to uniform buffer size constraints - debug_assert!( - clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096 - ); - - let inverse_projection = camera.projection_matrix().inverse(); - - for lights in &mut clusters.lights { - lights.entities.clear(); - lights.point_light_count = 0; - lights.spot_light_count = 0; - } - let cluster_count = - (clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z) as usize; - clusters - .lights - .resize_with(cluster_count, VisiblePointLights::default); - - // initialize empty cluster bounding spheres - cluster_aabb_spheres.clear(); - cluster_aabb_spheres.extend(std::iter::repeat(None).take(cluster_count)); - - // Calculate the x/y/z cluster frustum planes in view space - let mut x_planes = Vec::with_capacity(clusters.dimensions.x as usize + 1); - let mut y_planes = Vec::with_capacity(clusters.dimensions.y as usize + 1); - let mut z_planes = Vec::with_capacity(clusters.dimensions.z as usize + 1); - - if is_orthographic { - let x_slices = clusters.dimensions.x as f32; - for x in 0..=clusters.dimensions.x { - let x_proportion = x as f32 / x_slices; - let x_pos = x_proportion * 2.0 - 1.0; - let view_x = clip_to_view(inverse_projection, Vec4::new(x_pos, 0.0, 1.0, 1.0)).x; - let normal = Vec3::X; - let d = view_x * normal.x; - x_planes.push(HalfSpace::new(normal.extend(d))); - } - - let y_slices = clusters.dimensions.y as f32; - for y in 0..=clusters.dimensions.y { - let y_proportion = 1.0 - y as f32 / y_slices; - let y_pos = y_proportion * 2.0 - 1.0; - let view_y = clip_to_view(inverse_projection, Vec4::new(0.0, y_pos, 1.0, 1.0)).y; - let normal = Vec3::Y; - let d = view_y * normal.y; - y_planes.push(HalfSpace::new(normal.extend(d))); - } - } else { - let x_slices = clusters.dimensions.x as f32; - for x in 0..=clusters.dimensions.x { - let x_proportion = x as f32 / x_slices; - let x_pos = x_proportion * 2.0 - 1.0; - let nb = clip_to_view(inverse_projection, Vec4::new(x_pos, -1.0, 1.0, 1.0)).xyz(); - let nt = clip_to_view(inverse_projection, Vec4::new(x_pos, 1.0, 1.0, 1.0)).xyz(); - let normal = nb.cross(nt); - let d = nb.dot(normal); - x_planes.push(HalfSpace::new(normal.extend(d))); - } - - let y_slices = clusters.dimensions.y as f32; - for y in 0..=clusters.dimensions.y { - let y_proportion = 1.0 - y as f32 / y_slices; - let y_pos = y_proportion * 2.0 - 1.0; - let nl = clip_to_view(inverse_projection, Vec4::new(-1.0, y_pos, 1.0, 1.0)).xyz(); - let nr = clip_to_view(inverse_projection, Vec4::new(1.0, y_pos, 1.0, 1.0)).xyz(); - let normal = nr.cross(nl); - let d = nr.dot(normal); - y_planes.push(HalfSpace::new(normal.extend(d))); - } - } - - let z_slices = clusters.dimensions.z; - for z in 0..=z_slices { - let view_z = z_slice_to_view_z(first_slice_depth, far_z, z_slices, z, is_orthographic); - let normal = -Vec3::Z; - let d = view_z * normal.z; - z_planes.push(HalfSpace::new(normal.extend(d))); - } - - let mut update_from_light_intersections = |visible_lights: &mut Vec| { - for light in &lights { - // check if the light layers overlap the view layers - if !view_layers.intersects(&light.render_layers) { - continue; - } - - let light_sphere = light.sphere(); - - // Check if the light is within the view frustum - if !frustum.intersects_sphere(&light_sphere, true) { - continue; - } - - // NOTE: The light intersects the frustum so it must be visible and part of the global set - global_lights.entities.insert(light.entity); - visible_lights.push(light.entity); - - // note: caching seems to be slower than calling twice for this aabb calculation - let (light_aabb_xy_ndc_z_view_min, light_aabb_xy_ndc_z_view_max) = - cluster_space_light_aabb( - inverse_view_transform, - view_inv_scale, - camera.projection_matrix(), - &light_sphere, - ); - - let min_cluster = ndc_position_to_cluster( - clusters.dimensions, - cluster_factors, - is_orthographic, - light_aabb_xy_ndc_z_view_min, - light_aabb_xy_ndc_z_view_min.z, - ); - let max_cluster = ndc_position_to_cluster( - clusters.dimensions, - cluster_factors, - is_orthographic, - light_aabb_xy_ndc_z_view_max, - light_aabb_xy_ndc_z_view_max.z, - ); - let (min_cluster, max_cluster) = - (min_cluster.min(max_cluster), min_cluster.max(max_cluster)); - - // What follows is the Iterative Sphere Refinement algorithm from Just Cause 3 - // Persson et al, Practical Clustered Shading - // http://newq.net/dl/pub/s2015_practical.pdf - // NOTE: A sphere under perspective projection is no longer a sphere. It gets - // stretched and warped, which prevents simpler algorithms from being correct - // as they often assume that the widest part of the sphere under projection is the - // center point on the axis of interest plus the radius, and that is not true! - let view_light_sphere = Sphere { - center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)), - radius: light_sphere.radius * view_inv_scale_max, - }; - let spot_light_dir_sin_cos = light.spot_light_angle.map(|angle| { - let (angle_sin, angle_cos) = angle.sin_cos(); - ( - (inverse_view_transform * light.transform.back().extend(0.0)) - .truncate() - .normalize(), - angle_sin, - angle_cos, - ) - }); - let light_center_clip = - camera.projection_matrix() * view_light_sphere.center.extend(1.0); - let light_center_ndc = light_center_clip.xyz() / light_center_clip.w; - let cluster_coordinates = ndc_position_to_cluster( - clusters.dimensions, - cluster_factors, - is_orthographic, - light_center_ndc, - view_light_sphere.center.z, - ); - let z_center = if light_center_ndc.z <= 1.0 { - Some(cluster_coordinates.z) - } else { - None - }; - let y_center = if light_center_ndc.y > 1.0 { - None - } else if light_center_ndc.y < -1.0 { - Some(clusters.dimensions.y + 1) - } else { - Some(cluster_coordinates.y) - }; - for z in min_cluster.z..=max_cluster.z { - let mut z_light = view_light_sphere.clone(); - if z_center.is_none() || z != z_center.unwrap() { - // The z plane closer to the light has the larger radius circle where the - // light sphere intersects the z plane. - let z_plane = if z_center.is_some() && z < z_center.unwrap() { - z_planes[(z + 1) as usize] - } else { - z_planes[z as usize] - }; - // Project the sphere to this z plane and use its radius as the radius of a - // new, refined sphere. - if let Some(projected) = project_to_plane_z(z_light, z_plane) { - z_light = projected; - } else { - continue; - } - } - for y in min_cluster.y..=max_cluster.y { - let mut y_light = z_light.clone(); - if y_center.is_none() || y != y_center.unwrap() { - // The y plane closer to the light has the larger radius circle where the - // light sphere intersects the y plane. - let y_plane = if y_center.is_some() && y < y_center.unwrap() { - y_planes[(y + 1) as usize] - } else { - y_planes[y as usize] - }; - // Project the refined sphere to this y plane and use its radius as the - // radius of a new, even more refined sphere. - if let Some(projected) = - project_to_plane_y(y_light, y_plane, is_orthographic) - { - y_light = projected; - } else { - continue; - } - } - // Loop from the left to find the first affected cluster - let mut min_x = min_cluster.x; - loop { - if min_x >= max_cluster.x - || -get_distance_x( - x_planes[(min_x + 1) as usize], - y_light.center, - is_orthographic, - ) + y_light.radius - > 0.0 - { - break; - } - min_x += 1; - } - // Loop from the right to find the last affected cluster - let mut max_x = max_cluster.x; - loop { - if max_x <= min_x - || get_distance_x( - x_planes[max_x as usize], - y_light.center, - is_orthographic, - ) + y_light.radius - > 0.0 - { - break; - } - max_x -= 1; - } - let mut cluster_index = ((y * clusters.dimensions.x + min_x) - * clusters.dimensions.z - + z) as usize; - - if let Some((view_light_direction, angle_sin, angle_cos)) = - spot_light_dir_sin_cos - { - for x in min_x..=max_x { - // further culling for spot lights - // get or initialize cluster bounding sphere - let cluster_aabb_sphere = &mut cluster_aabb_spheres[cluster_index]; - let cluster_aabb_sphere = if let Some(sphere) = cluster_aabb_sphere - { - &*sphere - } else { - let aabb = compute_aabb_for_cluster( - first_slice_depth, - far_z, - clusters.tile_size.as_vec2(), - screen_size.as_vec2(), - inverse_projection, - is_orthographic, - clusters.dimensions, - UVec3::new(x, y, z), - ); - let sphere = Sphere { - center: aabb.center, - radius: aabb.half_extents.length(), - }; - *cluster_aabb_sphere = Some(sphere); - cluster_aabb_sphere.as_ref().unwrap() - }; - - // test -- based on https://bartwronski.com/2017/04/13/cull-that-cone/ - let spot_light_offset = Vec3::from( - view_light_sphere.center - cluster_aabb_sphere.center, - ); - let spot_light_dist_sq = spot_light_offset.length_squared(); - let v1_len = spot_light_offset.dot(view_light_direction); - - let distance_closest_point = (angle_cos - * (spot_light_dist_sq - v1_len * v1_len).sqrt()) - - v1_len * angle_sin; - let angle_cull = - distance_closest_point > cluster_aabb_sphere.radius; - - let front_cull = v1_len - > cluster_aabb_sphere.radius + light.range * view_inv_scale_max; - let back_cull = v1_len < -cluster_aabb_sphere.radius; - - if !angle_cull && !front_cull && !back_cull { - // this cluster is affected by the spot light - clusters.lights[cluster_index].entities.push(light.entity); - clusters.lights[cluster_index].spot_light_count += 1; - } - cluster_index += clusters.dimensions.z as usize; - } - } else { - for _ in min_x..=max_x { - // all clusters within range are affected by point lights - clusters.lights[cluster_index].entities.push(light.entity); - clusters.lights[cluster_index].point_light_count += 1; - cluster_index += clusters.dimensions.z as usize; - } - } - } - } - } - }; - - // reuse existing visible lights Vec, if it exists - if let Some(visible_lights) = visible_lights.as_mut() { - visible_lights.entities.clear(); - update_from_light_intersections(&mut visible_lights.entities); - } else { - let mut entities = Vec::new(); - update_from_light_intersections(&mut entities); - commands.entity(view_entity).insert(VisiblePointLights { - entities, - ..Default::default() - }); - } - } -} - -// NOTE: This exploits the fact that a x-plane normal has only x and z components -fn get_distance_x(plane: HalfSpace, point: Vec3A, is_orthographic: bool) -> f32 { - if is_orthographic { - point.x - plane.d() - } else { - // Distance from a point to a plane: - // signed distance to plane = (nx * px + ny * py + nz * pz + d) / n.length() - // NOTE: For a x-plane, ny and d are 0 and we have a unit normal - // = nx * px + nz * pz - plane.normal_d().xz().dot(point.xz()) - } -} - -// NOTE: This exploits the fact that a z-plane normal has only a z component -fn project_to_plane_z(z_light: Sphere, z_plane: HalfSpace) -> Option { - // p = sphere center - // n = plane normal - // d = n.p if p is in the plane - // NOTE: For a z-plane, nx and ny are both 0 - // d = px * nx + py * ny + pz * nz - // = pz * nz - // => pz = d / nz - let z = z_plane.d() / z_plane.normal_d().z; - let distance_to_plane = z - z_light.center.z; - if distance_to_plane.abs() > z_light.radius { - return None; - } - Some(Sphere { - center: Vec3A::from(z_light.center.xy().extend(z)), - // hypotenuse length = radius - // pythagoras = (distance to plane)^2 + b^2 = radius^2 - radius: (z_light.radius * z_light.radius - distance_to_plane * distance_to_plane).sqrt(), - }) -} - -// NOTE: This exploits the fact that a y-plane normal has only y and z components -fn project_to_plane_y( - y_light: Sphere, - y_plane: HalfSpace, - is_orthographic: bool, -) -> Option { - let distance_to_plane = if is_orthographic { - y_plane.d() - y_light.center.y - } else { - -y_light.center.yz().dot(y_plane.normal_d().yz()) - }; - - if distance_to_plane.abs() > y_light.radius { - return None; - } - Some(Sphere { - center: y_light.center + distance_to_plane * y_plane.normal(), - radius: (y_light.radius * y_light.radius - distance_to_plane * distance_to_plane).sqrt(), - }) -} - pub fn update_directional_light_frusta( mut views: Query< ( @@ -2154,65 +954,3 @@ pub fn check_light_mesh_visibility( } } } - -#[cfg(test)] -mod test { - use super::*; - - fn test_cluster_tiling(config: ClusterConfig, screen_size: UVec2) -> Clusters { - let dims = config.dimensions_for_screen_size(screen_size); - - // note: near & far do not affect tiling - let mut clusters = Clusters::default(); - clusters.update(screen_size, dims); - - // check we cover the screen - assert!(clusters.tile_size.x * clusters.dimensions.x >= screen_size.x); - assert!(clusters.tile_size.y * clusters.dimensions.y >= screen_size.y); - // check a smaller number of clusters would not cover the screen - assert!(clusters.tile_size.x * (clusters.dimensions.x - 1) < screen_size.x); - assert!(clusters.tile_size.y * (clusters.dimensions.y - 1) < screen_size.y); - // check a smaller tile size would not cover the screen - assert!((clusters.tile_size.x - 1) * clusters.dimensions.x < screen_size.x); - assert!((clusters.tile_size.y - 1) * clusters.dimensions.y < screen_size.y); - // check we don't have more clusters than pixels - assert!(clusters.dimensions.x <= screen_size.x); - assert!(clusters.dimensions.y <= screen_size.y); - - clusters - } - - #[test] - // check tiling for small screen sizes - fn test_default_cluster_setup_small_screensizes() { - for x in 1..100 { - for y in 1..100 { - let screen_size = UVec2::new(x, y); - let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); - assert!( - clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096 - ); - } - } - } - - #[test] - // check tiling for long thin screen sizes - fn test_default_cluster_setup_small_x() { - for x in 1..10 { - for y in 1..5000 { - let screen_size = UVec2::new(x, y); - let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); - assert!( - clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096 - ); - - let screen_size = UVec2::new(y, x); - let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size); - assert!( - clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096 - ); - } - } - } -} diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index b425a251aa..56107f8117 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -3,10 +3,9 @@ use bevy_core_pipeline::core_3d::CORE_3D_DEPTH_FORMAT; use bevy_ecs::entity::EntityHashSet; use bevy_ecs::prelude::*; use bevy_ecs::{entity::EntityHashMap, system::lifetimeless::Read}; -use bevy_math::{Mat4, UVec3, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles}; +use bevy_math::{Mat4, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles}; use bevy_render::mesh::Mesh; use bevy_render::{ - camera::Camera, diagnostic::RecordDiagnostics, mesh::GpuMesh, primitives::{CascadesFrusta, CubemapFrusta, Frustum, HalfSpace}, @@ -23,7 +22,7 @@ use bevy_transform::{components::GlobalTransform, prelude::Transform}; #[cfg(feature = "trace")] use bevy_utils::tracing::info_span; use bevy_utils::tracing::{error, warn}; -use std::{hash::Hash, num::NonZeroU64, ops::Range}; +use std::{hash::Hash, ops::Range}; use crate::*; @@ -56,96 +55,6 @@ pub struct ExtractedDirectionalLight { pub render_layers: RenderLayers, } -#[derive(Copy, Clone, ShaderType, Default, Debug)] -pub struct GpuPointLight { - // For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3] - // For spot lights: 2 components of the direction (x,z), spot_scale and spot_offset - light_custom_data: Vec4, - color_inverse_square_range: Vec4, - position_radius: Vec4, - flags: u32, - shadow_depth_bias: f32, - shadow_normal_bias: f32, - spot_light_tan_angle: f32, -} - -#[derive(ShaderType)] -pub struct GpuPointLightsUniform { - data: Box<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>, -} - -impl Default for GpuPointLightsUniform { - fn default() -> Self { - Self { - data: Box::new([GpuPointLight::default(); MAX_UNIFORM_BUFFER_POINT_LIGHTS]), - } - } -} - -#[derive(ShaderType, Default)] -pub struct GpuPointLightsStorage { - #[size(runtime)] - data: Vec, -} - -pub enum GpuPointLights { - Uniform(UniformBuffer), - Storage(StorageBuffer), -} - -impl GpuPointLights { - fn new(buffer_binding_type: BufferBindingType) -> Self { - match buffer_binding_type { - BufferBindingType::Storage { .. } => Self::storage(), - BufferBindingType::Uniform => Self::uniform(), - } - } - - fn uniform() -> Self { - Self::Uniform(UniformBuffer::default()) - } - - fn storage() -> Self { - Self::Storage(StorageBuffer::default()) - } - - fn set(&mut self, mut lights: Vec) { - match self { - GpuPointLights::Uniform(buffer) => { - let len = lights.len().min(MAX_UNIFORM_BUFFER_POINT_LIGHTS); - let src = &lights[..len]; - let dst = &mut buffer.get_mut().data[..len]; - dst.copy_from_slice(src); - } - GpuPointLights::Storage(buffer) => { - buffer.get_mut().data.clear(); - buffer.get_mut().data.append(&mut lights); - } - } - } - - fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { - match self { - GpuPointLights::Uniform(buffer) => buffer.write_buffer(render_device, render_queue), - GpuPointLights::Storage(buffer) => buffer.write_buffer(render_device, render_queue), - } - } - - pub fn binding(&self) -> Option { - match self { - GpuPointLights::Uniform(buffer) => buffer.binding(), - GpuPointLights::Storage(buffer) => buffer.binding(), - } - } - - pub fn min_size(buffer_binding_type: BufferBindingType) -> NonZeroU64 { - match buffer_binding_type { - BufferBindingType::Storage { .. } => GpuPointLightsStorage::min_size(), - BufferBindingType::Uniform => GpuPointLightsUniform::min_size(), - } - } -} - // NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_view_types.wgsl! bitflags::bitflags! { #[repr(transparent)] @@ -204,9 +113,6 @@ pub struct GpuLights { spot_light_shadowmap_offset: i32, } -// NOTE: this must be kept in sync with the same constants in pbr.frag -pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256; - //NOTE: When running bevy on Adreno GPU chipsets in WebGL, any value above 1 will result in a crash // when loading the wgsl "pbr_functions.wgsl" in the function apply_fog. #[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))] @@ -262,57 +168,6 @@ impl FromWorld for ShadowSamplers { } } -#[derive(Component)] -pub struct ExtractedClusterConfig { - /// Special near value for cluster calculations - near: f32, - far: f32, - /// Number of clusters in `X` / `Y` / `Z` in the view frustum - dimensions: UVec3, -} - -enum ExtractedClustersPointLightsElement { - ClusterHeader(u32, u32), - LightEntity(Entity), -} - -#[derive(Component)] -pub struct ExtractedClustersPointLights { - data: Vec, -} - -pub fn extract_clusters( - mut commands: Commands, - views: Extract>, -) { - for (entity, clusters, camera) in &views { - if !camera.is_active { - continue; - } - - let num_entities: usize = clusters.lights.iter().map(|l| l.entities.len()).sum(); - let mut data = Vec::with_capacity(clusters.lights.len() + num_entities); - for cluster_lights in &clusters.lights { - data.push(ExtractedClustersPointLightsElement::ClusterHeader( - cluster_lights.point_light_count as u32, - cluster_lights.spot_light_count as u32, - )); - for l in &cluster_lights.entities { - data.push(ExtractedClustersPointLightsElement::LightEntity(*l)); - } - } - - commands.get_or_spawn(entity).insert(( - ExtractedClustersPointLights { data }, - ExtractedClusterConfig { - near: clusters.near, - far: clusters.far, - dimensions: clusters.dimensions, - }, - )); - } -} - #[allow(clippy::too_many_arguments)] pub fn extract_lights( mut commands: Commands, @@ -584,35 +439,6 @@ pub struct ViewLightsUniformOffset { pub offset: u32, } -// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that -// at least that many are supported using this constant and SupportedBindingType::from_device() -pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3; - -#[derive(Resource)] -pub struct GlobalLightMeta { - pub gpu_point_lights: GpuPointLights, - pub entity_to_index: EntityHashMap, -} - -impl FromWorld for GlobalLightMeta { - fn from_world(world: &mut World) -> Self { - Self::new( - world - .resource::() - .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT), - ) - } -} - -impl GlobalLightMeta { - pub fn new(buffer_binding_type: BufferBindingType) -> Self { - Self { - gpu_point_lights: GpuPointLights::new(buffer_binding_type), - entity_to_index: EntityHashMap::default(), - } - } -} - #[derive(Resource, Default)] pub struct LightMeta { pub view_gpu_lights: DynamicUniformBuffer, @@ -808,7 +634,7 @@ pub fn prepare_lights( // point light shadows and `spot_light_shadow_maps_count` spot light shadow maps, // - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded. point_lights.sort_by(|(entity_1, light_1, _), (entity_2, light_2, _)| { - point_light_order( + crate::cluster::point_light_order( ( entity_1, &light_1.shadows_enabled, @@ -1327,327 +1153,6 @@ pub fn prepare_lights( shadow_render_phases.retain(|entity, _| live_shadow_mapping_lights.contains(entity)); } -// this must match CLUSTER_COUNT_SIZE in pbr.wgsl -// and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS -const CLUSTER_COUNT_SIZE: u32 = 9; - -const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - (CLUSTER_COUNT_SIZE * 2))) - 1; -const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1; - -// NOTE: With uniform buffer max binding size as 16384 bytes -// that means we can fit 256 point lights in one uniform -// buffer, which means the count can be at most 256 so it -// needs 9 bits. -// The array of indices can also use u8 and that means the -// offset in to the array of indices needs to be able to address -// 16384 values. log2(16384) = 14 bits. -// We use 32 bits to store the offset and counts so -// we pack the offset into the upper 14 bits of a u32, -// the point light count into bits 9-17, and the spot light count into bits 0-8. -// [ 31 .. 18 | 17 .. 9 | 8 .. 0 ] -// [ offset | point light count | spot light count ] -// NOTE: This assumes CPU and GPU endianness are the same which is true -// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs -fn pack_offset_and_counts(offset: usize, point_count: usize, spot_count: usize) -> u32 { - ((offset as u32 & CLUSTER_OFFSET_MASK) << (CLUSTER_COUNT_SIZE * 2)) - | (point_count as u32 & CLUSTER_COUNT_MASK) << CLUSTER_COUNT_SIZE - | (spot_count as u32 & CLUSTER_COUNT_MASK) -} - -#[derive(ShaderType)] -struct GpuClusterLightIndexListsUniform { - data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, -} - -// NOTE: Assert at compile time that GpuClusterLightIndexListsUniform -// fits within the maximum uniform buffer binding size -const _: () = assert!(GpuClusterLightIndexListsUniform::SHADER_SIZE.get() <= 16384); - -impl Default for GpuClusterLightIndexListsUniform { - fn default() -> Self { - Self { - data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]), - } - } -} - -#[derive(ShaderType)] -struct GpuClusterOffsetsAndCountsUniform { - data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, -} - -impl Default for GpuClusterOffsetsAndCountsUniform { - fn default() -> Self { - Self { - data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]), - } - } -} - -#[derive(ShaderType, Default)] -struct GpuClusterLightIndexListsStorage { - #[size(runtime)] - data: Vec, -} - -#[derive(ShaderType, Default)] -struct GpuClusterOffsetsAndCountsStorage { - #[size(runtime)] - data: Vec, -} - -enum ViewClusterBuffers { - Uniform { - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - cluster_light_index_lists: UniformBuffer, - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - cluster_offsets_and_counts: UniformBuffer, - }, - Storage { - cluster_light_index_lists: StorageBuffer, - cluster_offsets_and_counts: StorageBuffer, - }, -} - -impl ViewClusterBuffers { - fn new(buffer_binding_type: BufferBindingType) -> Self { - match buffer_binding_type { - BufferBindingType::Storage { .. } => Self::storage(), - BufferBindingType::Uniform => Self::uniform(), - } - } - - fn uniform() -> Self { - ViewClusterBuffers::Uniform { - cluster_light_index_lists: UniformBuffer::default(), - cluster_offsets_and_counts: UniformBuffer::default(), - } - } - - fn storage() -> Self { - ViewClusterBuffers::Storage { - cluster_light_index_lists: StorageBuffer::default(), - cluster_offsets_and_counts: StorageBuffer::default(), - } - } -} - -#[derive(Component)] -pub struct ViewClusterBindings { - n_indices: usize, - n_offsets: usize, - buffers: ViewClusterBuffers, -} - -impl ViewClusterBindings { - pub const MAX_OFFSETS: usize = 16384 / 4; - const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; - pub const MAX_INDICES: usize = 16384; - - pub fn new(buffer_binding_type: BufferBindingType) -> Self { - Self { - n_indices: 0, - n_offsets: 0, - buffers: ViewClusterBuffers::new(buffer_binding_type), - } - } - - pub fn clear(&mut self) { - match &mut self.buffers { - ViewClusterBuffers::Uniform { - cluster_light_index_lists, - cluster_offsets_and_counts, - } => { - *cluster_light_index_lists.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]; - *cluster_offsets_and_counts.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]; - } - ViewClusterBuffers::Storage { - cluster_light_index_lists, - cluster_offsets_and_counts, - .. - } => { - cluster_light_index_lists.get_mut().data.clear(); - cluster_offsets_and_counts.get_mut().data.clear(); - } - } - } - - pub fn push_offset_and_counts(&mut self, offset: usize, point_count: usize, spot_count: usize) { - match &mut self.buffers { - ViewClusterBuffers::Uniform { - cluster_offsets_and_counts, - .. - } => { - let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 - if array_index >= Self::MAX_UNIFORM_ITEMS { - warn!("cluster offset and count out of bounds!"); - return; - } - let component = self.n_offsets & ((1 << 2) - 1); - let packed = pack_offset_and_counts(offset, point_count, spot_count); - - cluster_offsets_and_counts.get_mut().data[array_index][component] = packed; - } - ViewClusterBuffers::Storage { - cluster_offsets_and_counts, - .. - } => { - cluster_offsets_and_counts.get_mut().data.push(UVec4::new( - offset as u32, - point_count as u32, - spot_count as u32, - 0, - )); - } - } - - self.n_offsets += 1; - } - - pub fn n_indices(&self) -> usize { - self.n_indices - } - - pub fn push_index(&mut self, index: usize) { - match &mut self.buffers { - ViewClusterBuffers::Uniform { - cluster_light_index_lists, - .. - } => { - let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 - let component = (self.n_indices >> 2) & ((1 << 2) - 1); - let sub_index = self.n_indices & ((1 << 2) - 1); - let index = index as u32; - - cluster_light_index_lists.get_mut().data[array_index][component] |= - index << (8 * sub_index); - } - ViewClusterBuffers::Storage { - cluster_light_index_lists, - .. - } => { - cluster_light_index_lists.get_mut().data.push(index as u32); - } - } - - self.n_indices += 1; - } - - pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { - match &mut self.buffers { - ViewClusterBuffers::Uniform { - cluster_light_index_lists, - cluster_offsets_and_counts, - } => { - cluster_light_index_lists.write_buffer(render_device, render_queue); - cluster_offsets_and_counts.write_buffer(render_device, render_queue); - } - ViewClusterBuffers::Storage { - cluster_light_index_lists, - cluster_offsets_and_counts, - } => { - cluster_light_index_lists.write_buffer(render_device, render_queue); - cluster_offsets_and_counts.write_buffer(render_device, render_queue); - } - } - } - - pub fn light_index_lists_binding(&self) -> Option { - match &self.buffers { - ViewClusterBuffers::Uniform { - cluster_light_index_lists, - .. - } => cluster_light_index_lists.binding(), - ViewClusterBuffers::Storage { - cluster_light_index_lists, - .. - } => cluster_light_index_lists.binding(), - } - } - - pub fn offsets_and_counts_binding(&self) -> Option { - match &self.buffers { - ViewClusterBuffers::Uniform { - cluster_offsets_and_counts, - .. - } => cluster_offsets_and_counts.binding(), - ViewClusterBuffers::Storage { - cluster_offsets_and_counts, - .. - } => cluster_offsets_and_counts.binding(), - } - } - - pub fn min_size_cluster_light_index_lists( - buffer_binding_type: BufferBindingType, - ) -> NonZeroU64 { - match buffer_binding_type { - BufferBindingType::Storage { .. } => GpuClusterLightIndexListsStorage::min_size(), - BufferBindingType::Uniform => GpuClusterLightIndexListsUniform::min_size(), - } - } - - pub fn min_size_cluster_offsets_and_counts( - buffer_binding_type: BufferBindingType, - ) -> NonZeroU64 { - match buffer_binding_type { - BufferBindingType::Storage { .. } => GpuClusterOffsetsAndCountsStorage::min_size(), - BufferBindingType::Uniform => GpuClusterOffsetsAndCountsUniform::min_size(), - } - } -} - -pub fn prepare_clusters( - mut commands: Commands, - render_device: Res, - render_queue: Res, - mesh_pipeline: Res, - global_light_meta: Res, - views: Query<(Entity, &ExtractedClustersPointLights)>, -) { - let render_device = render_device.into_inner(); - let supports_storage_buffers = matches!( - mesh_pipeline.clustered_forward_buffer_binding_type, - BufferBindingType::Storage { .. } - ); - for (entity, extracted_clusters) in &views { - let mut view_clusters_bindings = - ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type); - view_clusters_bindings.clear(); - - for record in &extracted_clusters.data { - match record { - ExtractedClustersPointLightsElement::ClusterHeader( - point_light_count, - spot_light_count, - ) => { - let offset = view_clusters_bindings.n_indices(); - view_clusters_bindings.push_offset_and_counts( - offset, - *point_light_count as usize, - *spot_light_count as usize, - ); - } - ExtractedClustersPointLightsElement::LightEntity(entity) => { - if let Some(light_index) = global_light_meta.entity_to_index.get(entity) { - if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES - && !supports_storage_buffers - { - warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); - break; - } - view_clusters_bindings.push_index(*light_index); - } - } - } - } - - view_clusters_bindings.write_buffers(render_device, &render_queue); - - commands.get_or_spawn(entity).insert(view_clusters_bindings); - } -} - /// For each shadow cascade, iterates over all the meshes "visible" from it and /// adds them to [`BinnedRenderPhase`]s or [`SortedRenderPhase`]s as /// appropriate.