Move clustering-related types and functions into their own module. (#13640)

As a prerequisite for decals and clustering of light probes, we want
clustering to operate on objects other than lights. (Currently, it only
operates on point and spot lights.) This necessitates a large
refactoring, so I'm splitting it up into small steps.

The first such step is to separate clustering from lighting by moving
clustering-related types and functions out of lighting and into their
own module subtree within the `bevy_pbr` crate. (Ultimately, we may want
to move it to `bevy_render`, but that requires more work and can be a
followup.)

No code changes have been made other than adjusting import lists and
moving code. This is to make this code easy to review. Ultimately, I
want to rename "light" to "clusterable object" in most cases, but doing
that at the same time as moving the code would make reviewing harder. So
instead I'm moving the code first and will follow this up with renaming.

## Migration Guide

* Clustering-related types and functions (e.g.
`assign_lights_to_clusters`) have moved under `bevy_pbr::cluster`, in
preparation for the ability to cluster objects other than lights.
This commit is contained in:
Patrick Walton 2024-06-03 08:05:48 -07:00 committed by GitHub
parent bb51635481
commit 5c74c17c24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1810 additions and 1763 deletions

View file

@ -0,0 +1,945 @@
//! Assigning objects to clusters.
use bevy_ecs::{
entity::Entity,
system::{Commands, Local, Query, Res, ResMut},
};
use bevy_math::{Mat4, UVec3, Vec2, Vec3, Vec3A, Vec3Swizzles as _, Vec4, Vec4Swizzles as _};
use bevy_render::{
camera::Camera,
primitives::{Aabb, Frustum, HalfSpace, Sphere},
render_resource::BufferBindingType,
renderer::RenderDevice,
view::{RenderLayers, ViewVisibility},
};
use bevy_transform::components::GlobalTransform;
use bevy_utils::tracing::warn;
use crate::{
ClusterConfig, ClusterFarZMode, Clusters, GlobalVisiblePointLights, PointLight, SpotLight,
ViewClusterBindings, VisiblePointLights, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT,
MAX_UNIFORM_BUFFER_POINT_LIGHTS,
};
const NDC_MIN: Vec2 = Vec2::NEG_ONE;
const NDC_MAX: Vec2 = Vec2::ONE;
const VEC2_HALF: Vec2 = Vec2::splat(0.5);
const VEC2_HALF_NEGATIVE_Y: Vec2 = Vec2::new(0.5, -0.5);
#[derive(Clone)]
// data required for assigning lights to clusters
pub(crate) struct PointLightAssignmentData {
entity: Entity,
transform: GlobalTransform,
range: f32,
shadows_enabled: bool,
spot_light_angle: Option<f32>,
render_layers: RenderLayers,
}
impl PointLightAssignmentData {
pub fn sphere(&self) -> Sphere {
Sphere {
center: self.transform.translation_vec3a(),
radius: self.range,
}
}
}
// NOTE: Run this before update_point_light_frusta!
#[allow(clippy::too_many_arguments)]
pub(crate) fn assign_lights_to_clusters(
mut commands: Commands,
mut global_lights: ResMut<GlobalVisiblePointLights>,
mut views: Query<(
Entity,
&GlobalTransform,
&Camera,
&Frustum,
&ClusterConfig,
&mut Clusters,
Option<&RenderLayers>,
Option<&mut VisiblePointLights>,
)>,
point_lights_query: Query<(
Entity,
&GlobalTransform,
&PointLight,
Option<&RenderLayers>,
&ViewVisibility,
)>,
spot_lights_query: Query<(
Entity,
&GlobalTransform,
&SpotLight,
Option<&RenderLayers>,
&ViewVisibility,
)>,
mut lights: Local<Vec<PointLightAssignmentData>>,
mut cluster_aabb_spheres: Local<Vec<Option<Sphere>>>,
mut max_point_lights_warning_emitted: Local<bool>,
render_device: Option<Res<RenderDevice>>,
) {
let Some(render_device) = render_device else {
return;
};
global_lights.entities.clear();
lights.clear();
// collect just the relevant light query data into a persisted vec to avoid reallocating each frame
lights.extend(
point_lights_query
.iter()
.filter(|(.., visibility)| visibility.get())
.map(
|(entity, transform, point_light, maybe_layers, _visibility)| {
PointLightAssignmentData {
entity,
transform: GlobalTransform::from_translation(transform.translation()),
shadows_enabled: point_light.shadows_enabled,
range: point_light.range,
spot_light_angle: None,
render_layers: maybe_layers.unwrap_or_default().clone(),
}
},
),
);
lights.extend(
spot_lights_query
.iter()
.filter(|(.., visibility)| visibility.get())
.map(
|(entity, transform, spot_light, maybe_layers, _visibility)| {
PointLightAssignmentData {
entity,
transform: *transform,
shadows_enabled: spot_light.shadows_enabled,
range: spot_light.range,
spot_light_angle: Some(spot_light.outer_angle),
render_layers: maybe_layers.unwrap_or_default().clone(),
}
},
),
);
let clustered_forward_buffer_binding_type =
render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT);
let supports_storage_buffers = matches!(
clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !supports_storage_buffers {
lights.sort_by(|light_1, light_2| {
crate::point_light_order(
(
&light_1.entity,
&light_1.shadows_enabled,
&light_1.spot_light_angle.is_some(),
),
(
&light_2.entity,
&light_2.shadows_enabled,
&light_2.spot_light_angle.is_some(),
),
)
});
// check each light against each view's frustum, keep only those that affect at least one of our views
let frusta: Vec<_> = views
.iter()
.map(|(_, _, _, frustum, _, _, _, _)| *frustum)
.collect();
let mut lights_in_view_count = 0;
lights.retain(|light| {
// take one extra light to check if we should emit the warning
if lights_in_view_count == MAX_UNIFORM_BUFFER_POINT_LIGHTS + 1 {
false
} else {
let light_sphere = light.sphere();
let light_in_view = frusta
.iter()
.any(|frustum| frustum.intersects_sphere(&light_sphere, true));
if light_in_view {
lights_in_view_count += 1;
}
light_in_view
}
});
if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !*max_point_lights_warning_emitted {
warn!(
"MAX_UNIFORM_BUFFER_POINT_LIGHTS ({}) exceeded",
MAX_UNIFORM_BUFFER_POINT_LIGHTS
);
*max_point_lights_warning_emitted = true;
}
lights.truncate(MAX_UNIFORM_BUFFER_POINT_LIGHTS);
}
for (
view_entity,
camera_transform,
camera,
frustum,
config,
clusters,
maybe_layers,
mut visible_lights,
) in &mut views
{
let view_layers = maybe_layers.unwrap_or_default();
let clusters = clusters.into_inner();
if matches!(config, ClusterConfig::None) {
if visible_lights.is_some() {
commands.entity(view_entity).remove::<VisiblePointLights>();
}
clusters.clear();
continue;
}
let Some(screen_size) = camera.physical_viewport_size() else {
clusters.clear();
continue;
};
let mut requested_cluster_dimensions = config.dimensions_for_screen_size(screen_size);
let view_transform = camera_transform.compute_matrix();
let view_inv_scale = camera_transform.compute_transform().scale.recip();
let view_inv_scale_max = view_inv_scale.abs().max_element();
let inverse_view_transform = view_transform.inverse();
let is_orthographic = camera.projection_matrix().w_axis.w == 1.0;
let far_z = match config.far_z_mode() {
ClusterFarZMode::MaxLightRange => {
let inverse_view_row_2 = inverse_view_transform.row(2);
lights
.iter()
.map(|light| {
-inverse_view_row_2.dot(light.transform.translation().extend(1.0))
+ light.range * view_inv_scale.z
})
.reduce(f32::max)
.unwrap_or(0.0)
}
ClusterFarZMode::Constant(far) => far,
};
let first_slice_depth = match (is_orthographic, requested_cluster_dimensions.z) {
(true, _) => {
// NOTE: Based on glam's Mat4::orthographic_rh(), as used to calculate the orthographic projection
// matrix, we can calculate the projection's view-space near plane as follows:
// component 3,2 = r * near and 2,2 = r where r = 1.0 / (near - far)
// There is a caveat here that when calculating the projection matrix, near and far were swapped to give
// reversed z, consistent with the perspective projection. So,
// 3,2 = r * far and 2,2 = r where r = 1.0 / (far - near)
// rearranging r = 1.0 / (far - near), r * (far - near) = 1.0, r * far - 1.0 = r * near, near = (r * far - 1.0) / r
// = (3,2 - 1.0) / 2,2
(camera.projection_matrix().w_axis.z - 1.0) / camera.projection_matrix().z_axis.z
}
(false, 1) => config.first_slice_depth().max(far_z),
_ => config.first_slice_depth(),
};
let first_slice_depth = first_slice_depth * view_inv_scale.z;
// NOTE: Ensure the far_z is at least as far as the first_depth_slice to avoid clustering problems.
let far_z = far_z.max(first_slice_depth);
let cluster_factors = crate::calculate_cluster_factors(
first_slice_depth,
far_z,
requested_cluster_dimensions.z as f32,
is_orthographic,
);
if config.dynamic_resizing() {
let mut cluster_index_estimate = 0.0;
for light in &lights {
let light_sphere = light.sphere();
// Check if the light is within the view frustum
if !frustum.intersects_sphere(&light_sphere, true) {
continue;
}
// calculate a conservative aabb estimate of number of clusters affected by this light
// this overestimates index counts by at most 50% (and typically much less) when the whole light range is in view
// it can overestimate more significantly when light ranges are only partially in view
let (light_aabb_min, light_aabb_max) = cluster_space_light_aabb(
inverse_view_transform,
view_inv_scale,
camera.projection_matrix(),
&light_sphere,
);
// since we won't adjust z slices we can calculate exact number of slices required in z dimension
let z_cluster_min = view_z_to_z_slice(
cluster_factors,
requested_cluster_dimensions.z,
light_aabb_min.z,
is_orthographic,
);
let z_cluster_max = view_z_to_z_slice(
cluster_factors,
requested_cluster_dimensions.z,
light_aabb_max.z,
is_orthographic,
);
let z_count =
z_cluster_min.max(z_cluster_max) - z_cluster_min.min(z_cluster_max) + 1;
// calculate x/y count using floats to avoid overestimating counts due to large initial tile sizes
let xy_min = light_aabb_min.xy();
let xy_max = light_aabb_max.xy();
// multiply by 0.5 to move from [-1,1] to [-0.5, 0.5], max extent of 1 in each dimension
let xy_count = (xy_max - xy_min)
* 0.5
* Vec2::new(
requested_cluster_dimensions.x as f32,
requested_cluster_dimensions.y as f32,
);
// add up to 2 to each axis to account for overlap
let x_overlap = if xy_min.x <= -1.0 { 0.0 } else { 1.0 }
+ if xy_max.x >= 1.0 { 0.0 } else { 1.0 };
let y_overlap = if xy_min.y <= -1.0 { 0.0 } else { 1.0 }
+ if xy_max.y >= 1.0 { 0.0 } else { 1.0 };
cluster_index_estimate +=
(xy_count.x + x_overlap) * (xy_count.y + y_overlap) * z_count as f32;
}
if cluster_index_estimate > ViewClusterBindings::MAX_INDICES as f32 {
// scale x and y cluster count to be able to fit all our indices
// we take the ratio of the actual indices over the index estimate.
// this is not guaranteed to be small enough due to overlapped tiles, but
// the conservative estimate is more than sufficient to cover the
// difference
let index_ratio = ViewClusterBindings::MAX_INDICES as f32 / cluster_index_estimate;
let xy_ratio = index_ratio.sqrt();
requested_cluster_dimensions.x =
((requested_cluster_dimensions.x as f32 * xy_ratio).floor() as u32).max(1);
requested_cluster_dimensions.y =
((requested_cluster_dimensions.y as f32 * xy_ratio).floor() as u32).max(1);
}
}
clusters.update(screen_size, requested_cluster_dimensions);
clusters.near = first_slice_depth;
clusters.far = far_z;
// NOTE: Maximum 4096 clusters due to uniform buffer size constraints
debug_assert!(
clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096
);
let inverse_projection = camera.projection_matrix().inverse();
for lights in &mut clusters.lights {
lights.entities.clear();
lights.point_light_count = 0;
lights.spot_light_count = 0;
}
let cluster_count =
(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z) as usize;
clusters
.lights
.resize_with(cluster_count, VisiblePointLights::default);
// initialize empty cluster bounding spheres
cluster_aabb_spheres.clear();
cluster_aabb_spheres.extend(std::iter::repeat(None).take(cluster_count));
// Calculate the x/y/z cluster frustum planes in view space
let mut x_planes = Vec::with_capacity(clusters.dimensions.x as usize + 1);
let mut y_planes = Vec::with_capacity(clusters.dimensions.y as usize + 1);
let mut z_planes = Vec::with_capacity(clusters.dimensions.z as usize + 1);
if is_orthographic {
let x_slices = clusters.dimensions.x as f32;
for x in 0..=clusters.dimensions.x {
let x_proportion = x as f32 / x_slices;
let x_pos = x_proportion * 2.0 - 1.0;
let view_x = clip_to_view(inverse_projection, Vec4::new(x_pos, 0.0, 1.0, 1.0)).x;
let normal = Vec3::X;
let d = view_x * normal.x;
x_planes.push(HalfSpace::new(normal.extend(d)));
}
let y_slices = clusters.dimensions.y as f32;
for y in 0..=clusters.dimensions.y {
let y_proportion = 1.0 - y as f32 / y_slices;
let y_pos = y_proportion * 2.0 - 1.0;
let view_y = clip_to_view(inverse_projection, Vec4::new(0.0, y_pos, 1.0, 1.0)).y;
let normal = Vec3::Y;
let d = view_y * normal.y;
y_planes.push(HalfSpace::new(normal.extend(d)));
}
} else {
let x_slices = clusters.dimensions.x as f32;
for x in 0..=clusters.dimensions.x {
let x_proportion = x as f32 / x_slices;
let x_pos = x_proportion * 2.0 - 1.0;
let nb = clip_to_view(inverse_projection, Vec4::new(x_pos, -1.0, 1.0, 1.0)).xyz();
let nt = clip_to_view(inverse_projection, Vec4::new(x_pos, 1.0, 1.0, 1.0)).xyz();
let normal = nb.cross(nt);
let d = nb.dot(normal);
x_planes.push(HalfSpace::new(normal.extend(d)));
}
let y_slices = clusters.dimensions.y as f32;
for y in 0..=clusters.dimensions.y {
let y_proportion = 1.0 - y as f32 / y_slices;
let y_pos = y_proportion * 2.0 - 1.0;
let nl = clip_to_view(inverse_projection, Vec4::new(-1.0, y_pos, 1.0, 1.0)).xyz();
let nr = clip_to_view(inverse_projection, Vec4::new(1.0, y_pos, 1.0, 1.0)).xyz();
let normal = nr.cross(nl);
let d = nr.dot(normal);
y_planes.push(HalfSpace::new(normal.extend(d)));
}
}
let z_slices = clusters.dimensions.z;
for z in 0..=z_slices {
let view_z = z_slice_to_view_z(first_slice_depth, far_z, z_slices, z, is_orthographic);
let normal = -Vec3::Z;
let d = view_z * normal.z;
z_planes.push(HalfSpace::new(normal.extend(d)));
}
let mut update_from_light_intersections = |visible_lights: &mut Vec<Entity>| {
for light in &lights {
// check if the light layers overlap the view layers
if !view_layers.intersects(&light.render_layers) {
continue;
}
let light_sphere = light.sphere();
// Check if the light is within the view frustum
if !frustum.intersects_sphere(&light_sphere, true) {
continue;
}
// NOTE: The light intersects the frustum so it must be visible and part of the global set
global_lights.entities.insert(light.entity);
visible_lights.push(light.entity);
// note: caching seems to be slower than calling twice for this aabb calculation
let (light_aabb_xy_ndc_z_view_min, light_aabb_xy_ndc_z_view_max) =
cluster_space_light_aabb(
inverse_view_transform,
view_inv_scale,
camera.projection_matrix(),
&light_sphere,
);
let min_cluster = ndc_position_to_cluster(
clusters.dimensions,
cluster_factors,
is_orthographic,
light_aabb_xy_ndc_z_view_min,
light_aabb_xy_ndc_z_view_min.z,
);
let max_cluster = ndc_position_to_cluster(
clusters.dimensions,
cluster_factors,
is_orthographic,
light_aabb_xy_ndc_z_view_max,
light_aabb_xy_ndc_z_view_max.z,
);
let (min_cluster, max_cluster) =
(min_cluster.min(max_cluster), min_cluster.max(max_cluster));
// What follows is the Iterative Sphere Refinement algorithm from Just Cause 3
// Persson et al, Practical Clustered Shading
// http://newq.net/dl/pub/s2015_practical.pdf
// NOTE: A sphere under perspective projection is no longer a sphere. It gets
// stretched and warped, which prevents simpler algorithms from being correct
// as they often assume that the widest part of the sphere under projection is the
// center point on the axis of interest plus the radius, and that is not true!
let view_light_sphere = Sphere {
center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)),
radius: light_sphere.radius * view_inv_scale_max,
};
let spot_light_dir_sin_cos = light.spot_light_angle.map(|angle| {
let (angle_sin, angle_cos) = angle.sin_cos();
(
(inverse_view_transform * light.transform.back().extend(0.0))
.truncate()
.normalize(),
angle_sin,
angle_cos,
)
});
let light_center_clip =
camera.projection_matrix() * view_light_sphere.center.extend(1.0);
let light_center_ndc = light_center_clip.xyz() / light_center_clip.w;
let cluster_coordinates = ndc_position_to_cluster(
clusters.dimensions,
cluster_factors,
is_orthographic,
light_center_ndc,
view_light_sphere.center.z,
);
let z_center = if light_center_ndc.z <= 1.0 {
Some(cluster_coordinates.z)
} else {
None
};
let y_center = if light_center_ndc.y > 1.0 {
None
} else if light_center_ndc.y < -1.0 {
Some(clusters.dimensions.y + 1)
} else {
Some(cluster_coordinates.y)
};
for z in min_cluster.z..=max_cluster.z {
let mut z_light = view_light_sphere.clone();
if z_center.is_none() || z != z_center.unwrap() {
// The z plane closer to the light has the larger radius circle where the
// light sphere intersects the z plane.
let z_plane = if z_center.is_some() && z < z_center.unwrap() {
z_planes[(z + 1) as usize]
} else {
z_planes[z as usize]
};
// Project the sphere to this z plane and use its radius as the radius of a
// new, refined sphere.
if let Some(projected) = project_to_plane_z(z_light, z_plane) {
z_light = projected;
} else {
continue;
}
}
for y in min_cluster.y..=max_cluster.y {
let mut y_light = z_light.clone();
if y_center.is_none() || y != y_center.unwrap() {
// The y plane closer to the light has the larger radius circle where the
// light sphere intersects the y plane.
let y_plane = if y_center.is_some() && y < y_center.unwrap() {
y_planes[(y + 1) as usize]
} else {
y_planes[y as usize]
};
// Project the refined sphere to this y plane and use its radius as the
// radius of a new, even more refined sphere.
if let Some(projected) =
project_to_plane_y(y_light, y_plane, is_orthographic)
{
y_light = projected;
} else {
continue;
}
}
// Loop from the left to find the first affected cluster
let mut min_x = min_cluster.x;
loop {
if min_x >= max_cluster.x
|| -get_distance_x(
x_planes[(min_x + 1) as usize],
y_light.center,
is_orthographic,
) + y_light.radius
> 0.0
{
break;
}
min_x += 1;
}
// Loop from the right to find the last affected cluster
let mut max_x = max_cluster.x;
loop {
if max_x <= min_x
|| get_distance_x(
x_planes[max_x as usize],
y_light.center,
is_orthographic,
) + y_light.radius
> 0.0
{
break;
}
max_x -= 1;
}
let mut cluster_index = ((y * clusters.dimensions.x + min_x)
* clusters.dimensions.z
+ z) as usize;
if let Some((view_light_direction, angle_sin, angle_cos)) =
spot_light_dir_sin_cos
{
for x in min_x..=max_x {
// further culling for spot lights
// get or initialize cluster bounding sphere
let cluster_aabb_sphere = &mut cluster_aabb_spheres[cluster_index];
let cluster_aabb_sphere = if let Some(sphere) = cluster_aabb_sphere
{
&*sphere
} else {
let aabb = compute_aabb_for_cluster(
first_slice_depth,
far_z,
clusters.tile_size.as_vec2(),
screen_size.as_vec2(),
inverse_projection,
is_orthographic,
clusters.dimensions,
UVec3::new(x, y, z),
);
let sphere = Sphere {
center: aabb.center,
radius: aabb.half_extents.length(),
};
*cluster_aabb_sphere = Some(sphere);
cluster_aabb_sphere.as_ref().unwrap()
};
// test -- based on https://bartwronski.com/2017/04/13/cull-that-cone/
let spot_light_offset = Vec3::from(
view_light_sphere.center - cluster_aabb_sphere.center,
);
let spot_light_dist_sq = spot_light_offset.length_squared();
let v1_len = spot_light_offset.dot(view_light_direction);
let distance_closest_point = (angle_cos
* (spot_light_dist_sq - v1_len * v1_len).sqrt())
- v1_len * angle_sin;
let angle_cull =
distance_closest_point > cluster_aabb_sphere.radius;
let front_cull = v1_len
> cluster_aabb_sphere.radius + light.range * view_inv_scale_max;
let back_cull = v1_len < -cluster_aabb_sphere.radius;
if !angle_cull && !front_cull && !back_cull {
// this cluster is affected by the spot light
clusters.lights[cluster_index].entities.push(light.entity);
clusters.lights[cluster_index].spot_light_count += 1;
}
cluster_index += clusters.dimensions.z as usize;
}
} else {
for _ in min_x..=max_x {
// all clusters within range are affected by point lights
clusters.lights[cluster_index].entities.push(light.entity);
clusters.lights[cluster_index].point_light_count += 1;
cluster_index += clusters.dimensions.z as usize;
}
}
}
}
}
};
// reuse existing visible lights Vec, if it exists
if let Some(visible_lights) = visible_lights.as_mut() {
visible_lights.entities.clear();
update_from_light_intersections(&mut visible_lights.entities);
} else {
let mut entities = Vec::new();
update_from_light_intersections(&mut entities);
commands.entity(view_entity).insert(VisiblePointLights {
entities,
..Default::default()
});
}
}
}
#[allow(clippy::too_many_arguments)]
fn compute_aabb_for_cluster(
z_near: f32,
z_far: f32,
tile_size: Vec2,
screen_size: Vec2,
inverse_projection: Mat4,
is_orthographic: bool,
cluster_dimensions: UVec3,
ijk: UVec3,
) -> Aabb {
let ijk = ijk.as_vec3();
// Calculate the minimum and maximum points in screen space
let p_min = ijk.xy() * tile_size;
let p_max = p_min + tile_size;
let cluster_min;
let cluster_max;
if is_orthographic {
// Use linear depth slicing for orthographic
// Convert to view space at the cluster near and far planes
// NOTE: 1.0 is the near plane due to using reverse z projections
let mut p_min = screen_to_view(screen_size, inverse_projection, p_min, 0.0).xyz();
let mut p_max = screen_to_view(screen_size, inverse_projection, p_max, 0.0).xyz();
// calculate cluster depth using z_near and z_far
p_min.z = -z_near + (z_near - z_far) * ijk.z / cluster_dimensions.z as f32;
p_max.z = -z_near + (z_near - z_far) * (ijk.z + 1.0) / cluster_dimensions.z as f32;
cluster_min = p_min.min(p_max);
cluster_max = p_min.max(p_max);
} else {
// Convert to view space at the near plane
// NOTE: 1.0 is the near plane due to using reverse z projections
let p_min = screen_to_view(screen_size, inverse_projection, p_min, 1.0);
let p_max = screen_to_view(screen_size, inverse_projection, p_max, 1.0);
let z_far_over_z_near = -z_far / -z_near;
let cluster_near = if ijk.z == 0.0 {
0.0
} else {
-z_near * z_far_over_z_near.powf((ijk.z - 1.0) / (cluster_dimensions.z - 1) as f32)
};
// NOTE: This could be simplified to:
// cluster_far = cluster_near * z_far_over_z_near;
let cluster_far = if cluster_dimensions.z == 1 {
-z_far
} else {
-z_near * z_far_over_z_near.powf(ijk.z / (cluster_dimensions.z - 1) as f32)
};
// Calculate the four intersection points of the min and max points with the cluster near and far planes
let p_min_near = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_near);
let p_min_far = line_intersection_to_z_plane(Vec3::ZERO, p_min.xyz(), cluster_far);
let p_max_near = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_near);
let p_max_far = line_intersection_to_z_plane(Vec3::ZERO, p_max.xyz(), cluster_far);
cluster_min = p_min_near.min(p_min_far).min(p_max_near.min(p_max_far));
cluster_max = p_min_near.max(p_min_far).max(p_max_near.max(p_max_far));
}
Aabb::from_min_max(cluster_min, cluster_max)
}
// NOTE: Keep in sync as the inverse of view_z_to_z_slice above
fn z_slice_to_view_z(
near: f32,
far: f32,
z_slices: u32,
z_slice: u32,
is_orthographic: bool,
) -> f32 {
if is_orthographic {
return -near - (far - near) * z_slice as f32 / z_slices as f32;
}
// Perspective
if z_slice == 0 {
0.0
} else {
-near * (far / near).powf((z_slice - 1) as f32 / (z_slices - 1) as f32)
}
}
fn ndc_position_to_cluster(
cluster_dimensions: UVec3,
cluster_factors: Vec2,
is_orthographic: bool,
ndc_p: Vec3,
view_z: f32,
) -> UVec3 {
let cluster_dimensions_f32 = cluster_dimensions.as_vec3();
let frag_coord = (ndc_p.xy() * VEC2_HALF_NEGATIVE_Y + VEC2_HALF).clamp(Vec2::ZERO, Vec2::ONE);
let xy = (frag_coord * cluster_dimensions_f32.xy()).floor();
let z_slice = view_z_to_z_slice(
cluster_factors,
cluster_dimensions.z,
view_z,
is_orthographic,
);
xy.as_uvec2()
.extend(z_slice)
.clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE)
}
/// Calculate bounds for the light using a view space aabb.
/// Returns a `(Vec3, Vec3)` containing minimum and maximum with
/// `X` and `Y` in normalized device coordinates with range `[-1, 1]`
/// `Z` in view space, with range `[-inf, -f32::MIN_POSITIVE]`
fn cluster_space_light_aabb(
inverse_view_transform: Mat4,
view_inv_scale: Vec3,
projection_matrix: Mat4,
light_sphere: &Sphere,
) -> (Vec3, Vec3) {
let light_aabb_view = Aabb {
center: Vec3A::from(inverse_view_transform * light_sphere.center.extend(1.0)),
half_extents: Vec3A::from(light_sphere.radius * view_inv_scale.abs()),
};
let (mut light_aabb_view_min, mut light_aabb_view_max) =
(light_aabb_view.min(), light_aabb_view.max());
// Constrain view z to be negative - i.e. in front of the camera
// When view z is >= 0.0 and we're using a perspective projection, bad things happen.
// At view z == 0.0, ndc x,y are mathematically undefined. At view z > 0.0, i.e. behind the camera,
// the perspective projection flips the directions of the axes. This breaks assumptions about
// use of min/max operations as something that was to the left in view space is now returning a
// coordinate that for view z in front of the camera would be on the right, but at view z behind the
// camera is on the left. So, we just constrain view z to be < 0.0 and necessarily in front of the camera.
light_aabb_view_min.z = light_aabb_view_min.z.min(-f32::MIN_POSITIVE);
light_aabb_view_max.z = light_aabb_view_max.z.min(-f32::MIN_POSITIVE);
// Is there a cheaper way to do this? The problem is that because of perspective
// the point at max z but min xy may be less xy in screenspace, and similar. As
// such, projecting the min and max xy at both the closer and further z and taking
// the min and max of those projected points addresses this.
let (
light_aabb_view_xymin_near,
light_aabb_view_xymin_far,
light_aabb_view_xymax_near,
light_aabb_view_xymax_far,
) = (
light_aabb_view_min,
light_aabb_view_min.xy().extend(light_aabb_view_max.z),
light_aabb_view_max.xy().extend(light_aabb_view_min.z),
light_aabb_view_max,
);
let (
light_aabb_clip_xymin_near,
light_aabb_clip_xymin_far,
light_aabb_clip_xymax_near,
light_aabb_clip_xymax_far,
) = (
projection_matrix * light_aabb_view_xymin_near.extend(1.0),
projection_matrix * light_aabb_view_xymin_far.extend(1.0),
projection_matrix * light_aabb_view_xymax_near.extend(1.0),
projection_matrix * light_aabb_view_xymax_far.extend(1.0),
);
let (
light_aabb_ndc_xymin_near,
light_aabb_ndc_xymin_far,
light_aabb_ndc_xymax_near,
light_aabb_ndc_xymax_far,
) = (
light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
);
let (light_aabb_ndc_min, light_aabb_ndc_max) = (
light_aabb_ndc_xymin_near
.min(light_aabb_ndc_xymin_far)
.min(light_aabb_ndc_xymax_near)
.min(light_aabb_ndc_xymax_far),
light_aabb_ndc_xymin_near
.max(light_aabb_ndc_xymin_far)
.max(light_aabb_ndc_xymax_near)
.max(light_aabb_ndc_xymax_far),
);
// clamp to ndc coords without depth
let (aabb_min_ndc, aabb_max_ndc) = (
light_aabb_ndc_min.xy().clamp(NDC_MIN, NDC_MAX),
light_aabb_ndc_max.xy().clamp(NDC_MIN, NDC_MAX),
);
// pack unadjusted z depth into the vecs
(
aabb_min_ndc.extend(light_aabb_view_min.z),
aabb_max_ndc.extend(light_aabb_view_max.z),
)
}
// Calculate the intersection of a ray from the eye through the view space position to a z plane
fn line_intersection_to_z_plane(origin: Vec3, p: Vec3, z: f32) -> Vec3 {
let v = p - origin;
let t = (z - Vec3::Z.dot(origin)) / Vec3::Z.dot(v);
origin + t * v
}
// NOTE: Keep in sync with bevy_pbr/src/render/pbr.wgsl
fn view_z_to_z_slice(
cluster_factors: Vec2,
z_slices: u32,
view_z: f32,
is_orthographic: bool,
) -> u32 {
let z_slice = if is_orthographic {
// NOTE: view_z is correct in the orthographic case
((view_z - cluster_factors.x) * cluster_factors.y).floor() as u32
} else {
// NOTE: had to use -view_z to make it positive else log(negative) is nan
((-view_z).ln() * cluster_factors.x - cluster_factors.y + 1.0) as u32
};
// NOTE: We use min as we may limit the far z plane used for clustering to be closer than
// the furthest thing being drawn. This means that we need to limit to the maximum cluster.
z_slice.min(z_slices - 1)
}
fn clip_to_view(inverse_projection: Mat4, clip: Vec4) -> Vec4 {
let view = inverse_projection * clip;
view / view.w
}
fn screen_to_view(screen_size: Vec2, inverse_projection: Mat4, screen: Vec2, ndc_z: f32) -> Vec4 {
let tex_coord = screen / screen_size;
let clip = Vec4::new(
tex_coord.x * 2.0 - 1.0,
(1.0 - tex_coord.y) * 2.0 - 1.0,
ndc_z,
1.0,
);
clip_to_view(inverse_projection, clip)
}
// NOTE: This exploits the fact that a x-plane normal has only x and z components
fn get_distance_x(plane: HalfSpace, point: Vec3A, is_orthographic: bool) -> f32 {
if is_orthographic {
point.x - plane.d()
} else {
// Distance from a point to a plane:
// signed distance to plane = (nx * px + ny * py + nz * pz + d) / n.length()
// NOTE: For a x-plane, ny and d are 0 and we have a unit normal
// = nx * px + nz * pz
plane.normal_d().xz().dot(point.xz())
}
}
// NOTE: This exploits the fact that a z-plane normal has only a z component
fn project_to_plane_z(z_light: Sphere, z_plane: HalfSpace) -> Option<Sphere> {
// p = sphere center
// n = plane normal
// d = n.p if p is in the plane
// NOTE: For a z-plane, nx and ny are both 0
// d = px * nx + py * ny + pz * nz
// = pz * nz
// => pz = d / nz
let z = z_plane.d() / z_plane.normal_d().z;
let distance_to_plane = z - z_light.center.z;
if distance_to_plane.abs() > z_light.radius {
return None;
}
Some(Sphere {
center: Vec3A::from(z_light.center.xy().extend(z)),
// hypotenuse length = radius
// pythagoras = (distance to plane)^2 + b^2 = radius^2
radius: (z_light.radius * z_light.radius - distance_to_plane * distance_to_plane).sqrt(),
})
}
// NOTE: This exploits the fact that a y-plane normal has only y and z components
fn project_to_plane_y(
y_light: Sphere,
y_plane: HalfSpace,
is_orthographic: bool,
) -> Option<Sphere> {
let distance_to_plane = if is_orthographic {
y_plane.d() - y_light.center.y
} else {
-y_light.center.yz().dot(y_plane.normal_d().yz())
};
if distance_to_plane.abs() > y_light.radius {
return None;
}
Some(Sphere {
center: y_light.center + distance_to_plane * y_plane.normal(),
radius: (y_light.radius * y_light.radius - distance_to_plane * distance_to_plane).sqrt(),
})
}

View file

@ -0,0 +1,803 @@
//! Spatial clustering of objects, currently just point and spot lights.
use std::num::NonZeroU64;
use bevy_ecs::{
component::Component,
entity::{Entity, EntityHashMap},
query::Without,
reflect::ReflectComponent,
system::{Commands, Query, Res, Resource},
world::{FromWorld, World},
};
use bevy_math::{AspectRatio, UVec2, UVec3, UVec4, Vec3Swizzles as _, Vec4};
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
use bevy_render::{
camera::Camera,
render_resource::{
BindingResource, BufferBindingType, ShaderSize as _, ShaderType, StorageBuffer,
UniformBuffer,
},
renderer::{RenderDevice, RenderQueue},
Extract,
};
use bevy_utils::{hashbrown::HashSet, tracing::warn};
pub(crate) use crate::cluster::assign::assign_lights_to_clusters;
use crate::MeshPipeline;
mod assign;
#[cfg(test)]
mod test;
// NOTE: this must be kept in sync with the same constants in pbr.frag
pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256;
// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that
// at least that many are supported using this constant and SupportedBindingType::from_device()
pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3;
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
// and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS
const CLUSTER_COUNT_SIZE: u32 = 9;
const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - (CLUSTER_COUNT_SIZE * 2))) - 1;
const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1;
// Clustered-forward rendering notes
// The main initial reference material used was this rather accessible article:
// http://www.aortiz.me/2018/12/21/CG.html
// Some inspiration was taken from “Practical Clustered Shading” which is part 2 of:
// https://efficientshading.com/2015/01/01/real-time-many-light-management-and-shadows-with-clustered-shading/
// (Also note that Part 3 of the above shows how we could support the shadow mapping for many lights.)
// The z-slicing method mentioned in the aortiz article is originally from Tiago Sousa's Siggraph 2016 talk about Doom 2016:
// http://advances.realtimerendering.com/s2016/Siggraph2016_idTech6.pdf
/// Configure the far z-plane mode used for the furthest depth slice for clustered forward
/// rendering
#[derive(Debug, Copy, Clone, Reflect)]
pub enum ClusterFarZMode {
/// Calculate the required maximum z-depth based on currently visible lights.
/// Makes better use of available clusters, speeding up GPU lighting operations
/// at the expense of some CPU time and using more indices in the cluster light
/// index lists.
MaxLightRange,
/// Constant max z-depth
Constant(f32),
}
/// Configure the depth-slicing strategy for clustered forward rendering
#[derive(Debug, Copy, Clone, Reflect)]
#[reflect(Default)]
pub struct ClusterZConfig {
/// Far `Z` plane of the first depth slice
pub first_slice_depth: f32,
/// Strategy for how to evaluate the far `Z` plane of the furthest depth slice
pub far_z_mode: ClusterFarZMode,
}
/// Configuration of the clustering strategy for clustered forward rendering
#[derive(Debug, Copy, Clone, Component, Reflect)]
#[reflect(Component)]
pub enum ClusterConfig {
/// Disable light cluster calculations for this view
None,
/// One single cluster. Optimal for low-light complexity scenes or scenes where
/// most lights affect the entire scene.
Single,
/// Explicit `X`, `Y` and `Z` counts (may yield non-square `X/Y` clusters depending on the aspect ratio)
XYZ {
dimensions: UVec3,
z_config: ClusterZConfig,
/// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding
/// the available cluster-light index limit
dynamic_resizing: bool,
},
/// Fixed number of `Z` slices, `X` and `Y` calculated to give square clusters
/// with at most total clusters. For top-down games where lights will generally always be within a
/// short depth range, it may be useful to use this configuration with 1 or few `Z` slices. This
/// would reduce the number of lights per cluster by distributing more clusters in screen space
/// `X/Y` which matches how lights are distributed in the scene.
FixedZ {
total: u32,
z_slices: u32,
z_config: ClusterZConfig,
/// Specify if clusters should automatically resize in `X/Y` if there is a risk of exceeding
/// the available cluster-light index limit
dynamic_resizing: bool,
},
}
#[derive(Component, Debug, Default)]
pub struct Clusters {
/// Tile size
pub(crate) tile_size: UVec2,
/// Number of clusters in `X` / `Y` / `Z` in the view frustum
pub(crate) dimensions: UVec3,
/// Distance to the far plane of the first depth slice. The first depth slice is special
/// and explicitly-configured to avoid having unnecessarily many slices close to the camera.
pub(crate) near: f32,
pub(crate) far: f32,
pub(crate) lights: Vec<VisiblePointLights>,
}
#[derive(Clone, Component, Debug, Default)]
pub struct VisiblePointLights {
pub(crate) entities: Vec<Entity>,
pub point_light_count: usize,
pub spot_light_count: usize,
}
#[derive(Resource, Default)]
pub struct GlobalVisiblePointLights {
pub(crate) entities: HashSet<Entity>,
}
#[derive(Resource)]
pub struct GlobalLightMeta {
pub gpu_point_lights: GpuPointLights,
pub entity_to_index: EntityHashMap<usize>,
}
#[derive(Copy, Clone, ShaderType, Default, Debug)]
pub struct GpuPointLight {
// For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3]
// For spot lights: 2 components of the direction (x,z), spot_scale and spot_offset
pub(crate) light_custom_data: Vec4,
pub(crate) color_inverse_square_range: Vec4,
pub(crate) position_radius: Vec4,
pub(crate) flags: u32,
pub(crate) shadow_depth_bias: f32,
pub(crate) shadow_normal_bias: f32,
pub(crate) spot_light_tan_angle: f32,
}
pub enum GpuPointLights {
Uniform(UniformBuffer<GpuPointLightsUniform>),
Storage(StorageBuffer<GpuPointLightsStorage>),
}
#[derive(ShaderType)]
pub struct GpuPointLightsUniform {
data: Box<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>,
}
#[derive(ShaderType, Default)]
pub struct GpuPointLightsStorage {
#[size(runtime)]
data: Vec<GpuPointLight>,
}
#[derive(Component)]
pub struct ExtractedClusterConfig {
/// Special near value for cluster calculations
pub(crate) near: f32,
pub(crate) far: f32,
/// Number of clusters in `X` / `Y` / `Z` in the view frustum
pub(crate) dimensions: UVec3,
}
enum ExtractedClustersPointLightsElement {
ClusterHeader(u32, u32),
LightEntity(Entity),
}
#[derive(Component)]
pub struct ExtractedClustersPointLights {
data: Vec<ExtractedClustersPointLightsElement>,
}
#[derive(ShaderType)]
struct GpuClusterOffsetsAndCountsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
#[derive(ShaderType, Default)]
struct GpuClusterLightIndexListsStorage {
#[size(runtime)]
data: Vec<u32>,
}
#[derive(ShaderType, Default)]
struct GpuClusterOffsetsAndCountsStorage {
#[size(runtime)]
data: Vec<UVec4>,
}
enum ViewClusterBuffers {
Uniform {
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_light_index_lists: UniformBuffer<GpuClusterLightIndexListsUniform>,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_offsets_and_counts: UniformBuffer<GpuClusterOffsetsAndCountsUniform>,
},
Storage {
cluster_light_index_lists: StorageBuffer<GpuClusterLightIndexListsStorage>,
cluster_offsets_and_counts: StorageBuffer<GpuClusterOffsetsAndCountsStorage>,
},
}
#[derive(Component)]
pub struct ViewClusterBindings {
n_indices: usize,
n_offsets: usize,
buffers: ViewClusterBuffers,
}
impl Default for ClusterZConfig {
fn default() -> Self {
Self {
first_slice_depth: 5.0,
far_z_mode: ClusterFarZMode::MaxLightRange,
}
}
}
impl Default for ClusterConfig {
fn default() -> Self {
// 24 depth slices, square clusters with at most 4096 total clusters
// use max light distance as clusters max `Z`-depth, first slice extends to 5.0
Self::FixedZ {
total: 4096,
z_slices: 24,
z_config: ClusterZConfig::default(),
dynamic_resizing: true,
}
}
}
impl ClusterConfig {
fn dimensions_for_screen_size(&self, screen_size: UVec2) -> UVec3 {
match &self {
ClusterConfig::None => UVec3::ZERO,
ClusterConfig::Single => UVec3::ONE,
ClusterConfig::XYZ { dimensions, .. } => *dimensions,
ClusterConfig::FixedZ {
total, z_slices, ..
} => {
let aspect_ratio: f32 =
AspectRatio::from_pixels(screen_size.x, screen_size.y).into();
let mut z_slices = *z_slices;
if *total < z_slices {
warn!("ClusterConfig has more z-slices than total clusters!");
z_slices = *total;
}
let per_layer = *total as f32 / z_slices as f32;
let y = f32::sqrt(per_layer / aspect_ratio);
let mut x = (y * aspect_ratio) as u32;
let mut y = y as u32;
// check extremes
if x == 0 {
x = 1;
y = per_layer as u32;
}
if y == 0 {
x = per_layer as u32;
y = 1;
}
UVec3::new(x, y, z_slices)
}
}
}
fn first_slice_depth(&self) -> f32 {
match self {
ClusterConfig::None | ClusterConfig::Single => 0.0,
ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
z_config.first_slice_depth
}
}
}
fn far_z_mode(&self) -> ClusterFarZMode {
match self {
ClusterConfig::None => ClusterFarZMode::Constant(0.0),
ClusterConfig::Single => ClusterFarZMode::MaxLightRange,
ClusterConfig::XYZ { z_config, .. } | ClusterConfig::FixedZ { z_config, .. } => {
z_config.far_z_mode
}
}
}
fn dynamic_resizing(&self) -> bool {
match self {
ClusterConfig::None | ClusterConfig::Single => false,
ClusterConfig::XYZ {
dynamic_resizing, ..
}
| ClusterConfig::FixedZ {
dynamic_resizing, ..
} => *dynamic_resizing,
}
}
}
impl Clusters {
fn update(&mut self, screen_size: UVec2, requested_dimensions: UVec3) {
debug_assert!(
requested_dimensions.x > 0 && requested_dimensions.y > 0 && requested_dimensions.z > 0
);
let tile_size = (screen_size.as_vec2() / requested_dimensions.xy().as_vec2())
.ceil()
.as_uvec2()
.max(UVec2::ONE);
self.tile_size = tile_size;
self.dimensions = (screen_size.as_vec2() / tile_size.as_vec2())
.ceil()
.as_uvec2()
.extend(requested_dimensions.z)
.max(UVec3::ONE);
// NOTE: Maximum 4096 clusters due to uniform buffer size constraints
debug_assert!(self.dimensions.x * self.dimensions.y * self.dimensions.z <= 4096);
}
fn clear(&mut self) {
self.tile_size = UVec2::ONE;
self.dimensions = UVec3::ZERO;
self.near = 0.0;
self.far = 0.0;
self.lights.clear();
}
}
pub fn add_clusters(
mut commands: Commands,
cameras: Query<(Entity, Option<&ClusterConfig>, &Camera), Without<Clusters>>,
) {
for (entity, config, camera) in &cameras {
if !camera.is_active {
continue;
}
let config = config.copied().unwrap_or_default();
// actual settings here don't matter - they will be overwritten in assign_lights_to_clusters
commands
.entity(entity)
.insert((Clusters::default(), config));
}
}
impl VisiblePointLights {
#[inline]
pub fn iter(&self) -> impl DoubleEndedIterator<Item = &Entity> {
self.entities.iter()
}
#[inline]
pub fn len(&self) -> usize {
self.entities.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.entities.is_empty()
}
}
impl GlobalVisiblePointLights {
#[inline]
pub fn iter(&self) -> impl Iterator<Item = &Entity> {
self.entities.iter()
}
#[inline]
pub fn contains(&self, entity: Entity) -> bool {
self.entities.contains(&entity)
}
}
impl FromWorld for GlobalLightMeta {
fn from_world(world: &mut World) -> Self {
Self::new(
world
.resource::<RenderDevice>()
.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT),
)
}
}
impl GlobalLightMeta {
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
gpu_point_lights: GpuPointLights::new(buffer_binding_type),
entity_to_index: EntityHashMap::default(),
}
}
}
impl GpuPointLights {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
Self::Uniform(UniformBuffer::default())
}
fn storage() -> Self {
Self::Storage(StorageBuffer::default())
}
pub(crate) fn set(&mut self, mut lights: Vec<GpuPointLight>) {
match self {
GpuPointLights::Uniform(buffer) => {
let len = lights.len().min(MAX_UNIFORM_BUFFER_POINT_LIGHTS);
let src = &lights[..len];
let dst = &mut buffer.get_mut().data[..len];
dst.copy_from_slice(src);
}
GpuPointLights::Storage(buffer) => {
buffer.get_mut().data.clear();
buffer.get_mut().data.append(&mut lights);
}
}
}
pub(crate) fn write_buffer(
&mut self,
render_device: &RenderDevice,
render_queue: &RenderQueue,
) {
match self {
GpuPointLights::Uniform(buffer) => buffer.write_buffer(render_device, render_queue),
GpuPointLights::Storage(buffer) => buffer.write_buffer(render_device, render_queue),
}
}
pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuPointLights::Uniform(buffer) => buffer.binding(),
GpuPointLights::Storage(buffer) => buffer.binding(),
}
}
pub fn min_size(buffer_binding_type: BufferBindingType) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuPointLightsStorage::min_size(),
BufferBindingType::Uniform => GpuPointLightsUniform::min_size(),
}
}
}
impl Default for GpuPointLightsUniform {
fn default() -> Self {
Self {
data: Box::new([GpuPointLight::default(); MAX_UNIFORM_BUFFER_POINT_LIGHTS]),
}
}
}
#[allow(clippy::too_many_arguments)]
// Sort lights by
// - point-light vs spot-light, so that we can iterate point lights and spot lights in contiguous blocks in the fragment shader,
// - then those with shadows enabled first, so that the index can be used to render at most `point_light_shadow_maps_count`
// point light shadows and `spot_light_shadow_maps_count` spot light shadow maps,
// - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded.
pub(crate) fn point_light_order(
(entity_1, shadows_enabled_1, is_spot_light_1): (&Entity, &bool, &bool),
(entity_2, shadows_enabled_2, is_spot_light_2): (&Entity, &bool, &bool),
) -> std::cmp::Ordering {
is_spot_light_1
.cmp(is_spot_light_2) // pointlights before spot lights
.then_with(|| shadows_enabled_2.cmp(shadows_enabled_1)) // shadow casters before non-casters
.then_with(|| entity_1.cmp(entity_2)) // stable
}
/// Extracts clusters from the main world from the render world.
pub fn extract_clusters(
mut commands: Commands,
views: Extract<Query<(Entity, &Clusters, &Camera)>>,
) {
for (entity, clusters, camera) in &views {
if !camera.is_active {
continue;
}
let num_entities: usize = clusters.lights.iter().map(|l| l.entities.len()).sum();
let mut data = Vec::with_capacity(clusters.lights.len() + num_entities);
for cluster_lights in &clusters.lights {
data.push(ExtractedClustersPointLightsElement::ClusterHeader(
cluster_lights.point_light_count as u32,
cluster_lights.spot_light_count as u32,
));
for l in &cluster_lights.entities {
data.push(ExtractedClustersPointLightsElement::LightEntity(*l));
}
}
commands.get_or_spawn(entity).insert((
ExtractedClustersPointLights { data },
ExtractedClusterConfig {
near: clusters.near,
far: clusters.far,
dimensions: clusters.dimensions,
},
));
}
}
pub fn prepare_clusters(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mesh_pipeline: Res<MeshPipeline>,
global_light_meta: Res<GlobalLightMeta>,
views: Query<(Entity, &ExtractedClustersPointLights)>,
) {
let render_device = render_device.into_inner();
let supports_storage_buffers = matches!(
mesh_pipeline.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
for (entity, extracted_clusters) in &views {
let mut view_clusters_bindings =
ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type);
view_clusters_bindings.clear();
for record in &extracted_clusters.data {
match record {
ExtractedClustersPointLightsElement::ClusterHeader(
point_light_count,
spot_light_count,
) => {
let offset = view_clusters_bindings.n_indices();
view_clusters_bindings.push_offset_and_counts(
offset,
*point_light_count as usize,
*spot_light_count as usize,
);
}
ExtractedClustersPointLightsElement::LightEntity(entity) => {
if let Some(light_index) = global_light_meta.entity_to_index.get(entity) {
if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES
&& !supports_storage_buffers
{
warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters.");
break;
}
view_clusters_bindings.push_index(*light_index);
}
}
}
}
view_clusters_bindings.write_buffers(render_device, &render_queue);
commands.get_or_spawn(entity).insert(view_clusters_bindings);
}
}
impl ViewClusterBindings {
pub const MAX_OFFSETS: usize = 16384 / 4;
const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
pub const MAX_INDICES: usize = 16384;
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
n_indices: 0,
n_offsets: 0,
buffers: ViewClusterBuffers::new(buffer_binding_type),
}
}
pub fn clear(&mut self) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
*cluster_light_index_lists.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
*cluster_offsets_and_counts.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
..
} => {
cluster_light_index_lists.get_mut().data.clear();
cluster_offsets_and_counts.get_mut().data.clear();
}
}
}
pub fn push_offset_and_counts(&mut self, offset: usize, point_count: usize, spot_count: usize) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => {
let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4
if array_index >= Self::MAX_UNIFORM_ITEMS {
warn!("cluster offset and count out of bounds!");
return;
}
let component = self.n_offsets & ((1 << 2) - 1);
let packed = pack_offset_and_counts(offset, point_count, spot_count);
cluster_offsets_and_counts.get_mut().data[array_index][component] = packed;
}
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => {
cluster_offsets_and_counts.get_mut().data.push(UVec4::new(
offset as u32,
point_count as u32,
spot_count as u32,
0,
));
}
}
self.n_offsets += 1;
}
pub fn n_indices(&self) -> usize {
self.n_indices
}
pub fn push_index(&mut self, index: usize) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
..
} => {
let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16
let component = (self.n_indices >> 2) & ((1 << 2) - 1);
let sub_index = self.n_indices & ((1 << 2) - 1);
let index = index as u32;
cluster_light_index_lists.get_mut().data[array_index][component] |=
index << (8 * sub_index);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => {
cluster_light_index_lists.get_mut().data.push(index as u32);
}
}
self.n_indices += 1;
}
pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
}
}
pub fn light_index_lists_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
}
}
pub fn offsets_and_counts_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
}
}
pub fn min_size_cluster_light_index_lists(
buffer_binding_type: BufferBindingType,
) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterLightIndexListsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterLightIndexListsUniform::min_size(),
}
}
pub fn min_size_cluster_offsets_and_counts(
buffer_binding_type: BufferBindingType,
) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterOffsetsAndCountsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterOffsetsAndCountsUniform::min_size(),
}
}
}
impl ViewClusterBuffers {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
ViewClusterBuffers::Uniform {
cluster_light_index_lists: UniformBuffer::default(),
cluster_offsets_and_counts: UniformBuffer::default(),
}
}
fn storage() -> Self {
ViewClusterBuffers::Storage {
cluster_light_index_lists: StorageBuffer::default(),
cluster_offsets_and_counts: StorageBuffer::default(),
}
}
}
// NOTE: With uniform buffer max binding size as 16384 bytes
// that means we can fit 256 point lights in one uniform
// buffer, which means the count can be at most 256 so it
// needs 9 bits.
// The array of indices can also use u8 and that means the
// offset in to the array of indices needs to be able to address
// 16384 values. log2(16384) = 14 bits.
// We use 32 bits to store the offset and counts so
// we pack the offset into the upper 14 bits of a u32,
// the point light count into bits 9-17, and the spot light count into bits 0-8.
// [ 31 .. 18 | 17 .. 9 | 8 .. 0 ]
// [ offset | point light count | spot light count ]
// NOTE: This assumes CPU and GPU endianness are the same which is true
// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs
fn pack_offset_and_counts(offset: usize, point_count: usize, spot_count: usize) -> u32 {
((offset as u32 & CLUSTER_OFFSET_MASK) << (CLUSTER_COUNT_SIZE * 2))
| (point_count as u32 & CLUSTER_COUNT_MASK) << CLUSTER_COUNT_SIZE
| (spot_count as u32 & CLUSTER_COUNT_MASK)
}
#[derive(ShaderType)]
struct GpuClusterLightIndexListsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
// NOTE: Assert at compile time that GpuClusterLightIndexListsUniform
// fits within the maximum uniform buffer binding size
const _: () = assert!(GpuClusterLightIndexListsUniform::SHADER_SIZE.get() <= 16384);
impl Default for GpuClusterLightIndexListsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}
impl Default for GpuClusterOffsetsAndCountsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}

View file

@ -0,0 +1,54 @@
use bevy_math::UVec2;
use crate::{ClusterConfig, Clusters};
fn test_cluster_tiling(config: ClusterConfig, screen_size: UVec2) -> Clusters {
let dims = config.dimensions_for_screen_size(screen_size);
// note: near & far do not affect tiling
let mut clusters = Clusters::default();
clusters.update(screen_size, dims);
// check we cover the screen
assert!(clusters.tile_size.x * clusters.dimensions.x >= screen_size.x);
assert!(clusters.tile_size.y * clusters.dimensions.y >= screen_size.y);
// check a smaller number of clusters would not cover the screen
assert!(clusters.tile_size.x * (clusters.dimensions.x - 1) < screen_size.x);
assert!(clusters.tile_size.y * (clusters.dimensions.y - 1) < screen_size.y);
// check a smaller tile size would not cover the screen
assert!((clusters.tile_size.x - 1) * clusters.dimensions.x < screen_size.x);
assert!((clusters.tile_size.y - 1) * clusters.dimensions.y < screen_size.y);
// check we don't have more clusters than pixels
assert!(clusters.dimensions.x <= screen_size.x);
assert!(clusters.dimensions.y <= screen_size.y);
clusters
}
#[test]
// check tiling for small screen sizes
fn test_default_cluster_setup_small_screensizes() {
for x in 1..100 {
for y in 1..100 {
let screen_size = UVec2::new(x, y);
let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size);
assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096);
}
}
}
#[test]
// check tiling for long thin screen sizes
fn test_default_cluster_setup_small_x() {
for x in 1..10 {
for y in 1..5000 {
let screen_size = UVec2::new(x, y);
let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size);
assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096);
let screen_size = UVec2::new(y, x);
let clusters = test_cluster_tiling(ClusterConfig::default(), screen_size);
assert!(clusters.dimensions.x * clusters.dimensions.y * clusters.dimensions.z <= 4096);
}
}
}

View file

@ -22,6 +22,7 @@ pub mod experimental {
}
mod bundle;
mod cluster;
pub mod deferred;
mod extended_material;
mod fog;
@ -41,6 +42,7 @@ use bevy_color::{Color, LinearRgba};
use std::marker::PhantomData;
pub use bundle::*;
pub use cluster::*;
pub use extended_material::*;
pub use fog::*;
pub use light::*;
@ -337,7 +339,7 @@ impl Plugin for PbrPlugin {
PostUpdate,
(
add_clusters.in_set(SimulationLightSystems::AddClusters),
assign_lights_to_clusters
crate::assign_lights_to_clusters
.in_set(SimulationLightSystems::AssignLightsToClusters)
.after(TransformSystem::TransformPropagate)
.after(VisibilitySystems::CheckVisibility)

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,9 @@ use bevy_core_pipeline::core_3d::CORE_3D_DEPTH_FORMAT;
use bevy_ecs::entity::EntityHashSet;
use bevy_ecs::prelude::*;
use bevy_ecs::{entity::EntityHashMap, system::lifetimeless::Read};
use bevy_math::{Mat4, UVec3, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles};
use bevy_math::{Mat4, UVec4, Vec2, Vec3, Vec3Swizzles, Vec4, Vec4Swizzles};
use bevy_render::mesh::Mesh;
use bevy_render::{
camera::Camera,
diagnostic::RecordDiagnostics,
mesh::GpuMesh,
primitives::{CascadesFrusta, CubemapFrusta, Frustum, HalfSpace},
@ -23,7 +22,7 @@ use bevy_transform::{components::GlobalTransform, prelude::Transform};
#[cfg(feature = "trace")]
use bevy_utils::tracing::info_span;
use bevy_utils::tracing::{error, warn};
use std::{hash::Hash, num::NonZeroU64, ops::Range};
use std::{hash::Hash, ops::Range};
use crate::*;
@ -56,96 +55,6 @@ pub struct ExtractedDirectionalLight {
pub render_layers: RenderLayers,
}
#[derive(Copy, Clone, ShaderType, Default, Debug)]
pub struct GpuPointLight {
// For point lights: the lower-right 2x2 values of the projection matrix [2][2] [2][3] [3][2] [3][3]
// For spot lights: 2 components of the direction (x,z), spot_scale and spot_offset
light_custom_data: Vec4,
color_inverse_square_range: Vec4,
position_radius: Vec4,
flags: u32,
shadow_depth_bias: f32,
shadow_normal_bias: f32,
spot_light_tan_angle: f32,
}
#[derive(ShaderType)]
pub struct GpuPointLightsUniform {
data: Box<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>,
}
impl Default for GpuPointLightsUniform {
fn default() -> Self {
Self {
data: Box::new([GpuPointLight::default(); MAX_UNIFORM_BUFFER_POINT_LIGHTS]),
}
}
}
#[derive(ShaderType, Default)]
pub struct GpuPointLightsStorage {
#[size(runtime)]
data: Vec<GpuPointLight>,
}
pub enum GpuPointLights {
Uniform(UniformBuffer<GpuPointLightsUniform>),
Storage(StorageBuffer<GpuPointLightsStorage>),
}
impl GpuPointLights {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
Self::Uniform(UniformBuffer::default())
}
fn storage() -> Self {
Self::Storage(StorageBuffer::default())
}
fn set(&mut self, mut lights: Vec<GpuPointLight>) {
match self {
GpuPointLights::Uniform(buffer) => {
let len = lights.len().min(MAX_UNIFORM_BUFFER_POINT_LIGHTS);
let src = &lights[..len];
let dst = &mut buffer.get_mut().data[..len];
dst.copy_from_slice(src);
}
GpuPointLights::Storage(buffer) => {
buffer.get_mut().data.clear();
buffer.get_mut().data.append(&mut lights);
}
}
}
fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match self {
GpuPointLights::Uniform(buffer) => buffer.write_buffer(render_device, render_queue),
GpuPointLights::Storage(buffer) => buffer.write_buffer(render_device, render_queue),
}
}
pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuPointLights::Uniform(buffer) => buffer.binding(),
GpuPointLights::Storage(buffer) => buffer.binding(),
}
}
pub fn min_size(buffer_binding_type: BufferBindingType) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuPointLightsStorage::min_size(),
BufferBindingType::Uniform => GpuPointLightsUniform::min_size(),
}
}
}
// NOTE: These must match the bit flags in bevy_pbr/src/render/mesh_view_types.wgsl!
bitflags::bitflags! {
#[repr(transparent)]
@ -204,9 +113,6 @@ pub struct GpuLights {
spot_light_shadowmap_offset: i32,
}
// NOTE: this must be kept in sync with the same constants in pbr.frag
pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256;
//NOTE: When running bevy on Adreno GPU chipsets in WebGL, any value above 1 will result in a crash
// when loading the wgsl "pbr_functions.wgsl" in the function apply_fog.
#[cfg(all(feature = "webgl", target_arch = "wasm32", not(feature = "webgpu")))]
@ -262,57 +168,6 @@ impl FromWorld for ShadowSamplers {
}
}
#[derive(Component)]
pub struct ExtractedClusterConfig {
/// Special near value for cluster calculations
near: f32,
far: f32,
/// Number of clusters in `X` / `Y` / `Z` in the view frustum
dimensions: UVec3,
}
enum ExtractedClustersPointLightsElement {
ClusterHeader(u32, u32),
LightEntity(Entity),
}
#[derive(Component)]
pub struct ExtractedClustersPointLights {
data: Vec<ExtractedClustersPointLightsElement>,
}
pub fn extract_clusters(
mut commands: Commands,
views: Extract<Query<(Entity, &Clusters, &Camera)>>,
) {
for (entity, clusters, camera) in &views {
if !camera.is_active {
continue;
}
let num_entities: usize = clusters.lights.iter().map(|l| l.entities.len()).sum();
let mut data = Vec::with_capacity(clusters.lights.len() + num_entities);
for cluster_lights in &clusters.lights {
data.push(ExtractedClustersPointLightsElement::ClusterHeader(
cluster_lights.point_light_count as u32,
cluster_lights.spot_light_count as u32,
));
for l in &cluster_lights.entities {
data.push(ExtractedClustersPointLightsElement::LightEntity(*l));
}
}
commands.get_or_spawn(entity).insert((
ExtractedClustersPointLights { data },
ExtractedClusterConfig {
near: clusters.near,
far: clusters.far,
dimensions: clusters.dimensions,
},
));
}
}
#[allow(clippy::too_many_arguments)]
pub fn extract_lights(
mut commands: Commands,
@ -584,35 +439,6 @@ pub struct ViewLightsUniformOffset {
pub offset: u32,
}
// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that
// at least that many are supported using this constant and SupportedBindingType::from_device()
pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3;
#[derive(Resource)]
pub struct GlobalLightMeta {
pub gpu_point_lights: GpuPointLights,
pub entity_to_index: EntityHashMap<usize>,
}
impl FromWorld for GlobalLightMeta {
fn from_world(world: &mut World) -> Self {
Self::new(
world
.resource::<RenderDevice>()
.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT),
)
}
}
impl GlobalLightMeta {
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
gpu_point_lights: GpuPointLights::new(buffer_binding_type),
entity_to_index: EntityHashMap::default(),
}
}
}
#[derive(Resource, Default)]
pub struct LightMeta {
pub view_gpu_lights: DynamicUniformBuffer<GpuLights>,
@ -808,7 +634,7 @@ pub fn prepare_lights(
// point light shadows and `spot_light_shadow_maps_count` spot light shadow maps,
// - then by entity as a stable key to ensure that a consistent set of lights are chosen if the light count limit is exceeded.
point_lights.sort_by(|(entity_1, light_1, _), (entity_2, light_2, _)| {
point_light_order(
crate::cluster::point_light_order(
(
entity_1,
&light_1.shadows_enabled,
@ -1327,327 +1153,6 @@ pub fn prepare_lights(
shadow_render_phases.retain(|entity, _| live_shadow_mapping_lights.contains(entity));
}
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
// and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS
const CLUSTER_COUNT_SIZE: u32 = 9;
const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - (CLUSTER_COUNT_SIZE * 2))) - 1;
const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1;
// NOTE: With uniform buffer max binding size as 16384 bytes
// that means we can fit 256 point lights in one uniform
// buffer, which means the count can be at most 256 so it
// needs 9 bits.
// The array of indices can also use u8 and that means the
// offset in to the array of indices needs to be able to address
// 16384 values. log2(16384) = 14 bits.
// We use 32 bits to store the offset and counts so
// we pack the offset into the upper 14 bits of a u32,
// the point light count into bits 9-17, and the spot light count into bits 0-8.
// [ 31 .. 18 | 17 .. 9 | 8 .. 0 ]
// [ offset | point light count | spot light count ]
// NOTE: This assumes CPU and GPU endianness are the same which is true
// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs
fn pack_offset_and_counts(offset: usize, point_count: usize, spot_count: usize) -> u32 {
((offset as u32 & CLUSTER_OFFSET_MASK) << (CLUSTER_COUNT_SIZE * 2))
| (point_count as u32 & CLUSTER_COUNT_MASK) << CLUSTER_COUNT_SIZE
| (spot_count as u32 & CLUSTER_COUNT_MASK)
}
#[derive(ShaderType)]
struct GpuClusterLightIndexListsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
// NOTE: Assert at compile time that GpuClusterLightIndexListsUniform
// fits within the maximum uniform buffer binding size
const _: () = assert!(GpuClusterLightIndexListsUniform::SHADER_SIZE.get() <= 16384);
impl Default for GpuClusterLightIndexListsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}
#[derive(ShaderType)]
struct GpuClusterOffsetsAndCountsUniform {
data: Box<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>,
}
impl Default for GpuClusterOffsetsAndCountsUniform {
fn default() -> Self {
Self {
data: Box::new([UVec4::ZERO; ViewClusterBindings::MAX_UNIFORM_ITEMS]),
}
}
}
#[derive(ShaderType, Default)]
struct GpuClusterLightIndexListsStorage {
#[size(runtime)]
data: Vec<u32>,
}
#[derive(ShaderType, Default)]
struct GpuClusterOffsetsAndCountsStorage {
#[size(runtime)]
data: Vec<UVec4>,
}
enum ViewClusterBuffers {
Uniform {
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_light_index_lists: UniformBuffer<GpuClusterLightIndexListsUniform>,
// NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment
cluster_offsets_and_counts: UniformBuffer<GpuClusterOffsetsAndCountsUniform>,
},
Storage {
cluster_light_index_lists: StorageBuffer<GpuClusterLightIndexListsStorage>,
cluster_offsets_and_counts: StorageBuffer<GpuClusterOffsetsAndCountsStorage>,
},
}
impl ViewClusterBuffers {
fn new(buffer_binding_type: BufferBindingType) -> Self {
match buffer_binding_type {
BufferBindingType::Storage { .. } => Self::storage(),
BufferBindingType::Uniform => Self::uniform(),
}
}
fn uniform() -> Self {
ViewClusterBuffers::Uniform {
cluster_light_index_lists: UniformBuffer::default(),
cluster_offsets_and_counts: UniformBuffer::default(),
}
}
fn storage() -> Self {
ViewClusterBuffers::Storage {
cluster_light_index_lists: StorageBuffer::default(),
cluster_offsets_and_counts: StorageBuffer::default(),
}
}
}
#[derive(Component)]
pub struct ViewClusterBindings {
n_indices: usize,
n_offsets: usize,
buffers: ViewClusterBuffers,
}
impl ViewClusterBindings {
pub const MAX_OFFSETS: usize = 16384 / 4;
const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4;
pub const MAX_INDICES: usize = 16384;
pub fn new(buffer_binding_type: BufferBindingType) -> Self {
Self {
n_indices: 0,
n_offsets: 0,
buffers: ViewClusterBuffers::new(buffer_binding_type),
}
}
pub fn clear(&mut self) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
*cluster_light_index_lists.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
*cluster_offsets_and_counts.get_mut().data = [UVec4::ZERO; Self::MAX_UNIFORM_ITEMS];
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
..
} => {
cluster_light_index_lists.get_mut().data.clear();
cluster_offsets_and_counts.get_mut().data.clear();
}
}
}
pub fn push_offset_and_counts(&mut self, offset: usize, point_count: usize, spot_count: usize) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => {
let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4
if array_index >= Self::MAX_UNIFORM_ITEMS {
warn!("cluster offset and count out of bounds!");
return;
}
let component = self.n_offsets & ((1 << 2) - 1);
let packed = pack_offset_and_counts(offset, point_count, spot_count);
cluster_offsets_and_counts.get_mut().data[array_index][component] = packed;
}
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => {
cluster_offsets_and_counts.get_mut().data.push(UVec4::new(
offset as u32,
point_count as u32,
spot_count as u32,
0,
));
}
}
self.n_offsets += 1;
}
pub fn n_indices(&self) -> usize {
self.n_indices
}
pub fn push_index(&mut self, index: usize) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
..
} => {
let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16
let component = (self.n_indices >> 2) & ((1 << 2) - 1);
let sub_index = self.n_indices & ((1 << 2) - 1);
let index = index as u32;
cluster_light_index_lists.get_mut().data[array_index][component] |=
index << (8 * sub_index);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => {
cluster_light_index_lists.get_mut().data.push(index as u32);
}
}
self.n_indices += 1;
}
pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {
match &mut self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
ViewClusterBuffers::Storage {
cluster_light_index_lists,
cluster_offsets_and_counts,
} => {
cluster_light_index_lists.write_buffer(render_device, render_queue);
cluster_offsets_and_counts.write_buffer(render_device, render_queue);
}
}
}
pub fn light_index_lists_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
ViewClusterBuffers::Storage {
cluster_light_index_lists,
..
} => cluster_light_index_lists.binding(),
}
}
pub fn offsets_and_counts_binding(&self) -> Option<BindingResource> {
match &self.buffers {
ViewClusterBuffers::Uniform {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
ViewClusterBuffers::Storage {
cluster_offsets_and_counts,
..
} => cluster_offsets_and_counts.binding(),
}
}
pub fn min_size_cluster_light_index_lists(
buffer_binding_type: BufferBindingType,
) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterLightIndexListsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterLightIndexListsUniform::min_size(),
}
}
pub fn min_size_cluster_offsets_and_counts(
buffer_binding_type: BufferBindingType,
) -> NonZeroU64 {
match buffer_binding_type {
BufferBindingType::Storage { .. } => GpuClusterOffsetsAndCountsStorage::min_size(),
BufferBindingType::Uniform => GpuClusterOffsetsAndCountsUniform::min_size(),
}
}
}
pub fn prepare_clusters(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mesh_pipeline: Res<MeshPipeline>,
global_light_meta: Res<GlobalLightMeta>,
views: Query<(Entity, &ExtractedClustersPointLights)>,
) {
let render_device = render_device.into_inner();
let supports_storage_buffers = matches!(
mesh_pipeline.clustered_forward_buffer_binding_type,
BufferBindingType::Storage { .. }
);
for (entity, extracted_clusters) in &views {
let mut view_clusters_bindings =
ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type);
view_clusters_bindings.clear();
for record in &extracted_clusters.data {
match record {
ExtractedClustersPointLightsElement::ClusterHeader(
point_light_count,
spot_light_count,
) => {
let offset = view_clusters_bindings.n_indices();
view_clusters_bindings.push_offset_and_counts(
offset,
*point_light_count as usize,
*spot_light_count as usize,
);
}
ExtractedClustersPointLightsElement::LightEntity(entity) => {
if let Some(light_index) = global_light_meta.entity_to_index.get(entity) {
if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES
&& !supports_storage_buffers
{
warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters.");
break;
}
view_clusters_bindings.push_index(*light_index);
}
}
}
}
view_clusters_bindings.write_buffers(render_device, &render_queue);
commands.get_or_spawn(entity).insert(view_clusters_bindings);
}
}
/// For each shadow cascade, iterates over all the meshes "visible" from it and
/// adds them to [`BinnedRenderPhase`]s or [`SortedRenderPhase`]s as
/// appropriate.