Meshlet fix software rasterization (#16049)

# Objective
1. Prevent weird glitches with stray pixels scattered around the scene

![image](https://github.com/user-attachments/assets/f12adb38-5996-4dc7-bea6-bd326b7317e1)
2. Prevent weird glitchy full-screen triangles that pop-up and destroy
perf (SW rasterizing huge triangles is slow)

![image](https://github.com/user-attachments/assets/d3705427-13a5-47bc-a54b-756f0409da0b)

## Solution
1. Use floating point math in the SW rasterizer bounding box calculation
to handle negative verticss, and add backface culling
2. Force hardware raster for clusters that clip the near plane, and let
the hardware rasterizer handle the clipping

I also adjusted the SW rasterizer threshold to < 64 pixels (little bit
better perf in my test scene, but still need to do a more comprehensive
test), and enabled backface culling for the hardware raster pipeline.

## Testing

- Did you test these changes? If so, how?
  - Yes, on an example scene. Issues no longer occur.
- Are there any parts that need more testing?
  - No.
- How can other people (reviewers) test your changes? Is there anything
specific they need to know?
  - Run the meshlet example.
This commit is contained in:
JMS55 2024-10-22 16:05:40 -07:00 committed by GitHub
parent fe4f44bb43
commit 2223f6ec3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 42 additions and 39 deletions

View file

@ -126,11 +126,13 @@ fn cull_clusters(
aabb_width_pixels = (aabb.z - aabb.x) * view.viewport.z;
aabb_height_pixels = (aabb.w - aabb.y) * view.viewport.w;
#endif
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(32.0)); // TODO: Nanite does something different. Come up with my own heuristic.
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(64.0));
// Let the hardware rasterizer handle near-plane clipping
let not_intersects_near_plane = dot(view.frustum[4u], culling_bounding_sphere_center) > culling_bounding_sphere_radius;
// TODO: Also check if needs depth clipping
var buffer_slot: u32;
if cluster_is_small {
if cluster_is_small && not_intersects_near_plane {
// Append this cluster to the list for software rasterization
buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u);
} else {

View file

@ -249,7 +249,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,
@ -292,7 +292,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,
@ -336,7 +336,7 @@ impl FromWorld for MeshletPipelines {
topology: PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: FrontFace::Ccw,
cull_mode: None,
cull_mode: Some(Face::Back),
unclipped_depth: false,
polygon_mode: PolygonMode::Fill,
conservative: false,

View file

@ -20,7 +20,7 @@
/// Compute shader for rasterizing small clusters into a visibility buffer.
// TODO: Subpixel precision and top-left rule
// TODO: Fixed-point math and top-left rule
var<workgroup> viewport_vertices: array<vec3f, 255>;
@ -79,98 +79,99 @@ fn rasterize_cluster(
let vertex_2 = viewport_vertices[vertex_ids[0]];
let packed_ids = (cluster_id << 7u) | triangle_id;
// Compute triangle bounding box
let min_x = u32(min3(vertex_0.x, vertex_1.x, vertex_2.x));
let min_y = u32(min3(vertex_0.y, vertex_1.y, vertex_2.y));
var max_x = u32(ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x)));
var max_y = u32(ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y)));
max_x = min(max_x, u32(view.viewport.z) - 1u);
max_y = min(max_y, u32(view.viewport.w) - 1u);
if any(vec2(min_x, min_y) > vec2(max_x, max_y)) { return; }
// Backface culling
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy);
if triangle_double_area <= 0.0 { return; }
// Setup triangle gradients
let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y);
let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x);
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); // TODO: Reuse earlier calculations and take advantage of summing to 1
let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area;
let z_x = dot(vertices_z, w_x);
let z_y = dot(vertices_z, w_y);
// Compute triangle bounding box
var min_x = floor(min3(vertex_0.x, vertex_1.x, vertex_2.x));
var min_y = floor(min3(vertex_0.y, vertex_1.y, vertex_2.y));
var max_x = ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x));
var max_y = ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y));
min_x = max(min_x, 0.0);
min_y = max(min_y, 0.0);
max_x = min(max_x, view.viewport.z - 1.0);
max_y = min(max_y, view.viewport.w - 1.0);
// Setup initial triangle equations
let starting_pixel = vec2(f32(min_x), f32(min_y)) + 0.5;
let starting_pixel = vec2(min_x, min_y) + 0.5;
var w_row = vec3(
// TODO: Reuse earlier calculations and take advantage of summing to 1
edge_function(vertex_1.xy, vertex_2.xy, starting_pixel),
edge_function(vertex_2.xy, vertex_0.xy, starting_pixel),
edge_function(vertex_0.xy, vertex_1.xy, starting_pixel),
);
var z_row = dot(vertices_z, w_row);
let view_width = u32(view.viewport.z);
var frag_coord_1d_row = min_y * view_width;
// Rasterize triangle
if subgroupAny(max_x - min_x > 4u) {
if subgroupAny(max_x - min_x > 4.0) {
// Scanline setup
let edge_012 = -w_x;
let open_edge = edge_012 < vec3(0.0);
let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0));
let max_x_diff = vec3<f32>(max_x - min_x);
for (var y = min_y; y <= max_y; y++) {
let max_x_diff = vec3(max_x - min_x);
for (var y = min_y; y <= max_y; y += 1.0) {
// Calculate start and end X interval for pixels in this row within the triangle
let cross_x = w_row * inverse_edge_012;
let min_x2 = select(vec3(0.0), cross_x, open_edge);
let max_x2 = select(cross_x, max_x_diff, open_edge);
var x0 = u32(ceil(max3(min_x2[0], min_x2[1], min_x2[2])));
var x1 = u32(min3(max_x2[0], max_x2[1], max_x2[2]));
var x0 = ceil(max3(min_x2[0], min_x2[1], min_x2[2]));
var x1 = min3(max_x2[0], max_x2[1], max_x2[2]);
var w = w_row + w_x * f32(x0);
var z = z_row + z_x * f32(x0);
var w = w_row + w_x * x0;
var z = z_row + z_x * x0;
x0 += min_x;
x1 += min_x;
// Iterate scanline X interval
for (var x = x0; x <= x1; x++) {
for (var x = x0; x <= x1; x += 1.0) {
// Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it)
if min3(w[0], w[1], w[2]) >= 0.0 {
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
write_visibility_buffer_pixel(x, y, z, packed_ids);
}
// Increment edge functions along the X-axis
// Increment triangle equations along the X-axis
w += w_x;
z += z_x;
}
// Increment edge functions along the Y-axis
// Increment triangle equations along the Y-axis
w_row += w_y;
z_row += z_y;
frag_coord_1d_row += view_width;
}
} else {
// Iterate over every pixel in the triangle's bounding box
for (var y = min_y; y <= max_y; y++) {
for (var y = min_y; y <= max_y; y += 1.0) {
var w = w_row;
var z = z_row;
for (var x = min_x; x <= max_x; x++) {
for (var x = min_x; x <= max_x; x += 1.0) {
// Check if point at pixel is within triangle
if min3(w[0], w[1], w[2]) >= 0.0 {
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
write_visibility_buffer_pixel(x, y, z, packed_ids);
}
// Increment edge functions along the X-axis
// Increment triangle equations along the X-axis
w += w_x;
z += z_x;
}
// Increment edge functions along the Y-axis
// Increment triangle equations along the Y-axis
w_row += w_y;
z_row += z_y;
frag_coord_1d_row += view_width;
}
}
}
fn write_visibility_buffer_pixel(frag_coord_1d: u32, z: f32, packed_ids: u32) {
fn write_visibility_buffer_pixel(x: f32, y: f32, z: f32, packed_ids: u32) {
let frag_coord_1d = u32(y * view.viewport.z + x);
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
let depth = bitcast<u32>(z);
let visibility = (u64(depth) << 32u) | u64(packed_ids);