Improved text batching (#14848)

# Objective

The UI text rendering is really slow because it extracts each glyph as a
separate ui node even though all the glyphs in a text section have the
same texture, color and clipping rects.

## Solution

Store the glyphs in a seperate contiguous array, queue one transparent
ui item per text section which has indices into the glyph array.

## Testing

```cargo run --example many_glyphs --release```

Runs at about 22fps on main and 95fps with this PR on my computer.

I'll do some proper comparisons once I work out why tracy 11 is refusing to run.

---------

Co-authored-by: Kristoffer Søholm <k.soeholm@gmail.com>
This commit is contained in:
ickshonpe 2024-10-08 23:24:27 +01:00 committed by GitHub
parent 2d1b4939d2
commit 675f8ad403
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -116,7 +116,7 @@ pub fn build_ui_render(app: &mut App) {
extract_uinode_images.in_set(RenderUiSystem::ExtractImages),
extract_uinode_borders.in_set(RenderUiSystem::ExtractBorders),
#[cfg(feature = "bevy_text")]
extract_uinode_text.in_set(RenderUiSystem::ExtractText),
extract_text_sections.in_set(RenderUiSystem::ExtractText),
),
)
.add_systems(
@ -160,6 +160,19 @@ fn get_ui_graph(render_app: &mut SubApp) -> RenderGraph {
ui_graph
}
pub struct ExtractedUiNode {
pub stack_index: u32,
pub color: LinearRgba,
pub rect: Rect,
pub image: AssetId<Image>,
pub clip: Option<Rect>,
// Camera to render this UI node to. By the time it is extracted,
// it is defaulted to a single camera if only one exists.
// Nodes with ambiguous camera will be ignored.
pub camera_entity: Entity,
pub item: ExtractedUiItem,
}
/// The type of UI node.
/// This is used to determine how to render the UI node.
#[derive(Clone, Copy, Debug, PartialEq)]
@ -168,30 +181,44 @@ pub enum NodeType {
Border,
}
pub struct ExtractedUiNode {
pub stack_index: u32,
pub enum ExtractedUiItem {
Node {
atlas_scaling: Option<Vec2>,
flip_x: bool,
flip_y: bool,
/// Border radius of the UI node.
/// Ordering: top left, top right, bottom right, bottom left.
border_radius: ResolvedBorderRadius,
/// Border thickness of the UI node.
/// Ordering: left, top, right, bottom.
border: BorderRect,
node_type: NodeType,
transform: Mat4,
},
/// A contiguous sequence of text glyphs from the same section
Glyphs {
atlas_scaling: Vec2,
/// Indices into [`ExtractedUiNodes::glyphs`]
range: Range<usize>,
},
}
pub struct ExtractedGlyph {
pub transform: Mat4,
pub color: LinearRgba,
pub rect: Rect,
pub image: AssetId<Image>,
pub atlas_scaling: Option<Vec2>,
pub clip: Option<Rect>,
pub flip_x: bool,
pub flip_y: bool,
// Camera to render this UI node to. By the time it is extracted,
// it is defaulted to a single camera if only one exists.
// Nodes with ambiguous camera will be ignored.
pub camera_entity: Entity,
/// Border radius of the UI node.
pub border_radius: ResolvedBorderRadius,
/// Border thickness of the UI node.
pub border: BorderRect,
pub node_type: NodeType,
}
#[derive(Resource, Default)]
pub struct ExtractedUiNodes {
pub uinodes: EntityHashMap<ExtractedUiNode>,
pub glyphs: Vec<ExtractedGlyph>,
}
impl ExtractedUiNodes {
pub fn clear(&mut self) {
self.uinodes.clear();
self.glyphs.clear();
}
}
#[allow(clippy::too_many_arguments)]
@ -217,7 +244,7 @@ pub fn extract_uinode_background_colors(
continue;
};
let Ok(&camera_entity) = mapping.get(camera_entity) else {
let Ok(&render_camera_entity) = mapping.get(camera_entity) else {
continue;
};
@ -230,7 +257,6 @@ pub fn extract_uinode_background_colors(
commands.spawn(TemporaryRenderEntity).id(),
ExtractedUiNode {
stack_index: uinode.stack_index,
transform: transform.compute_matrix(),
color: background_color.0.into(),
rect: Rect {
min: Vec2::ZERO,
@ -238,13 +264,16 @@ pub fn extract_uinode_background_colors(
},
clip: clip.map(|clip| clip.clip),
image: AssetId::default(),
atlas_scaling: None,
flip_x: false,
flip_y: false,
camera_entity: camera_entity.id(),
border: uinode.border(),
border_radius: uinode.border_radius(),
node_type: NodeType::Rect,
camera_entity: render_camera_entity.id(),
item: ExtractedUiItem::Node {
atlas_scaling: None,
transform: transform.compute_matrix(),
flip_x: false,
flip_y: false,
border: uinode.border(),
border_radius: uinode.border_radius(),
node_type: NodeType::Rect,
},
},
);
}
@ -321,18 +350,20 @@ pub fn extract_uinode_images(
commands.spawn(TemporaryRenderEntity).id(),
ExtractedUiNode {
stack_index: uinode.stack_index,
transform: transform.compute_matrix(),
color: image.color.into(),
rect,
clip: clip.map(|clip| clip.clip),
image: image.texture.id(),
atlas_scaling,
flip_x: image.flip_x,
flip_y: image.flip_y,
camera_entity: render_camera_entity.id(),
border: uinode.border,
border_radius: uinode.border_radius,
node_type: NodeType::Rect,
item: ExtractedUiItem::Node {
atlas_scaling,
transform: transform.compute_matrix(),
flip_x: image.flip_x,
flip_y: image.flip_y,
border: uinode.border,
border_radius: uinode.border_radius,
node_type: NodeType::Rect,
},
},
);
}
@ -372,7 +403,7 @@ pub fn extract_uinode_borders(
continue;
};
let Ok(&camera_entity) = mapping.get(camera_entity) else {
let Ok(&render_camera_entity) = mapping.get(camera_entity) else {
continue;
};
@ -391,21 +422,23 @@ pub fn extract_uinode_borders(
commands.spawn(TemporaryRenderEntity).id(),
ExtractedUiNode {
stack_index: uinode.stack_index,
transform: global_transform.compute_matrix(),
color: border_color.0.into(),
rect: Rect {
max: uinode.size(),
..Default::default()
},
image,
atlas_scaling: None,
clip: maybe_clip.map(|clip| clip.clip),
flip_x: false,
flip_y: false,
camera_entity: camera_entity.id(),
border_radius: uinode.border_radius(),
border: uinode.border(),
node_type: NodeType::Border,
camera_entity: render_camera_entity.id(),
item: ExtractedUiItem::Node {
atlas_scaling: None,
transform: global_transform.compute_matrix(),
flip_x: false,
flip_y: false,
border: uinode.border(),
border_radius: uinode.border_radius(),
node_type: NodeType::Border,
},
},
);
}
@ -417,21 +450,23 @@ pub fn extract_uinode_borders(
commands.spawn(TemporaryRenderEntity).id(),
ExtractedUiNode {
stack_index: uinode.stack_index,
transform: global_transform.compute_matrix(),
color: outline.color.into(),
rect: Rect {
max: outline_size,
..Default::default()
},
image,
atlas_scaling: None,
clip: maybe_clip.map(|clip| clip.clip),
flip_x: false,
flip_y: false,
camera_entity: camera_entity.id(),
border: BorderRect::square(uinode.outline_width()),
border_radius: uinode.outline_radius(),
node_type: NodeType::Border,
camera_entity: render_camera_entity.id(),
item: ExtractedUiItem::Node {
transform: global_transform.compute_matrix(),
atlas_scaling: None,
flip_x: false,
flip_y: false,
border: BorderRect::square(uinode.outline_width()),
border_radius: uinode.outline_radius(),
node_type: NodeType::Border,
},
},
);
}
@ -544,7 +579,7 @@ pub fn extract_default_ui_camera_view(
#[cfg(feature = "bevy_text")]
#[allow(clippy::too_many_arguments)]
pub fn extract_uinode_text(
pub fn extract_text_sections(
mut commands: Commands,
mut extracted_uinodes: ResMut<ExtractedUiNodes>,
camera_query: Extract<Query<&Camera>>,
@ -564,6 +599,9 @@ pub fn extract_uinode_text(
>,
mapping: Extract<Query<&RenderEntity>>,
) {
let mut start = 0;
let mut end = 1;
let default_ui_camera = default_ui_camera.get();
for (uinode, global_transform, view_visibility, clip, camera, text, text_layout_info) in
&uinode_query
@ -585,7 +623,7 @@ pub fn extract_uinode_text(
* ui_scale.0;
let inverse_scale_factor = scale_factor.recip();
let Ok(&camera_entity) = mapping.get(camera_entity) else {
let Ok(&render_camera_entity) = mapping.get(camera_entity) else {
continue;
};
// Align the text to the nearest physical pixel:
@ -604,44 +642,58 @@ pub fn extract_uinode_text(
transform.translation = transform.translation.round();
transform.translation *= inverse_scale_factor;
let mut color = LinearRgba::WHITE;
let mut current_section = usize::MAX;
for PositionedGlyph {
position,
atlas_info,
section_index,
..
} in &text_layout_info.glyphs
for (
i,
PositionedGlyph {
position,
atlas_info,
section_index,
..
},
) in text_layout_info.glyphs.iter().enumerate()
{
if *section_index != current_section {
color = LinearRgba::from(text.sections[*section_index].style.color);
current_section = *section_index;
}
let atlas = texture_atlases.get(&atlas_info.texture_atlas).unwrap();
let mut rect = atlas.textures[atlas_info.location.glyph_index].as_rect();
rect.min *= inverse_scale_factor;
rect.max *= inverse_scale_factor;
let id = commands.spawn(TemporaryRenderEntity).id();
extracted_uinodes.uinodes.insert(
id,
ExtractedUiNode {
stack_index: uinode.stack_index,
transform: transform
* Mat4::from_translation(position.extend(0.) * inverse_scale_factor),
color,
rect,
image: atlas_info.texture.id(),
atlas_scaling: Some(Vec2::splat(inverse_scale_factor)),
clip: clip.map(|clip| clip.clip),
flip_x: false,
flip_y: false,
camera_entity: camera_entity.id(),
border: BorderRect::ZERO,
border_radius: ResolvedBorderRadius::ZERO,
node_type: NodeType::Rect,
},
);
extracted_uinodes.glyphs.push(ExtractedGlyph {
transform: transform
* Mat4::from_translation(position.extend(0.) * inverse_scale_factor),
rect,
});
if text_layout_info
.glyphs
.get(i + 1)
.map(|info| {
info.section_index != *section_index
|| info.atlas_info.texture != atlas_info.texture
})
.unwrap_or(true)
{
let id = commands.spawn(TemporaryRenderEntity).id();
extracted_uinodes.uinodes.insert(
id,
ExtractedUiNode {
stack_index: uinode.stack_index,
color: LinearRgba::from(text.sections[*section_index].style.color),
image: atlas_info.texture.id(),
clip: clip.map(|clip| clip.clip),
camera_entity: render_camera_entity.id(),
rect,
item: ExtractedUiItem::Glyphs {
atlas_scaling: Vec2::splat(inverse_scale_factor),
range: start..end,
},
},
);
start = end;
}
end += 1;
}
}
}
@ -870,151 +922,258 @@ pub fn prepare_uinodes(
continue;
}
}
match &extracted_uinode.item {
ExtractedUiItem::Node {
atlas_scaling,
flip_x,
flip_y,
border_radius,
border,
node_type,
transform,
} => {
let mut flags = if extracted_uinode.image != AssetId::default() {
shader_flags::TEXTURED
} else {
shader_flags::UNTEXTURED
};
let mut flags = if extracted_uinode.image != AssetId::default() {
shader_flags::TEXTURED
} else {
shader_flags::UNTEXTURED
};
let mut uinode_rect = extracted_uinode.rect;
let mut uinode_rect = extracted_uinode.rect;
let rect_size = uinode_rect.size().extend(1.0);
let rect_size = uinode_rect.size().extend(1.0);
// Specify the corners of the node
let positions = QUAD_VERTEX_POSITIONS
.map(|pos| (*transform * (pos * rect_size).extend(1.)).xyz());
// Specify the corners of the node
let positions = QUAD_VERTEX_POSITIONS.map(|pos| {
(extracted_uinode.transform * (pos * rect_size).extend(1.)).xyz()
});
// Calculate the effect of clipping
// Note: this won't work with rotation/scaling, but that's much more complex (may need more that 2 quads)
let mut positions_diff = if let Some(clip) = extracted_uinode.clip {
[
Vec2::new(
f32::max(clip.min.x - positions[0].x, 0.),
f32::max(clip.min.y - positions[0].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[1].x, 0.),
f32::max(clip.min.y - positions[1].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[2].x, 0.),
f32::min(clip.max.y - positions[2].y, 0.),
),
Vec2::new(
f32::max(clip.min.x - positions[3].x, 0.),
f32::min(clip.max.y - positions[3].y, 0.),
),
]
} else {
[Vec2::ZERO; 4]
};
// Calculate the effect of clipping
// Note: this won't work with rotation/scaling, but that's much more complex (may need more that 2 quads)
let mut positions_diff = if let Some(clip) = extracted_uinode.clip {
[
Vec2::new(
f32::max(clip.min.x - positions[0].x, 0.),
f32::max(clip.min.y - positions[0].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[1].x, 0.),
f32::max(clip.min.y - positions[1].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[2].x, 0.),
f32::min(clip.max.y - positions[2].y, 0.),
),
Vec2::new(
f32::max(clip.min.x - positions[3].x, 0.),
f32::min(clip.max.y - positions[3].y, 0.),
),
]
} else {
[Vec2::ZERO; 4]
};
let positions_clipped = [
positions[0] + positions_diff[0].extend(0.),
positions[1] + positions_diff[1].extend(0.),
positions[2] + positions_diff[2].extend(0.),
positions[3] + positions_diff[3].extend(0.),
];
let positions_clipped = [
positions[0] + positions_diff[0].extend(0.),
positions[1] + positions_diff[1].extend(0.),
positions[2] + positions_diff[2].extend(0.),
positions[3] + positions_diff[3].extend(0.),
];
let transformed_rect_size = transform.transform_vector3(rect_size);
let transformed_rect_size =
extracted_uinode.transform.transform_vector3(rect_size);
// Don't try to cull nodes that have a rotation
// In a rotation around the Z-axis, this value is 0.0 for an angle of 0.0 or π
// In those two cases, the culling check can proceed normally as corners will be on
// horizontal / vertical lines
// For all other angles, bypass the culling check
// This does not properly handles all rotations on all axis
if transform.x_axis[1] == 0.0 {
// Cull nodes that are completely clipped
if positions_diff[0].x - positions_diff[1].x
>= transformed_rect_size.x
|| positions_diff[1].y - positions_diff[2].y
>= transformed_rect_size.y
{
continue;
}
}
let uvs = if flags == shader_flags::UNTEXTURED {
[Vec2::ZERO, Vec2::X, Vec2::ONE, Vec2::Y]
} else {
let image = gpu_images.get(extracted_uinode.image).expect(
"Image was checked during batching and should still exist",
);
// Rescale atlases. This is done here because we need texture data that might not be available in Extract.
let atlas_extent = atlas_scaling
.map(|scaling| image.size.as_vec2() * scaling)
.unwrap_or(uinode_rect.max);
if *flip_x {
core::mem::swap(&mut uinode_rect.max.x, &mut uinode_rect.min.x);
positions_diff[0].x *= -1.;
positions_diff[1].x *= -1.;
positions_diff[2].x *= -1.;
positions_diff[3].x *= -1.;
}
if *flip_y {
core::mem::swap(&mut uinode_rect.max.y, &mut uinode_rect.min.y);
positions_diff[0].y *= -1.;
positions_diff[1].y *= -1.;
positions_diff[2].y *= -1.;
positions_diff[3].y *= -1.;
}
[
Vec2::new(
uinode_rect.min.x + positions_diff[0].x,
uinode_rect.min.y + positions_diff[0].y,
),
Vec2::new(
uinode_rect.max.x + positions_diff[1].x,
uinode_rect.min.y + positions_diff[1].y,
),
Vec2::new(
uinode_rect.max.x + positions_diff[2].x,
uinode_rect.max.y + positions_diff[2].y,
),
Vec2::new(
uinode_rect.min.x + positions_diff[3].x,
uinode_rect.max.y + positions_diff[3].y,
),
]
.map(|pos| pos / atlas_extent)
};
// Don't try to cull nodes that have a rotation
// In a rotation around the Z-axis, this value is 0.0 for an angle of 0.0 or π
// In those two cases, the culling check can proceed normally as corners will be on
// horizontal / vertical lines
// For all other angles, bypass the culling check
// This does not properly handles all rotations on all axis
if extracted_uinode.transform.x_axis[1] == 0.0 {
// Cull nodes that are completely clipped
if positions_diff[0].x - positions_diff[1].x >= transformed_rect_size.x
|| positions_diff[1].y - positions_diff[2].y >= transformed_rect_size.y
{
continue;
let color = extracted_uinode.color.to_f32_array();
if *node_type == NodeType::Border {
flags |= shader_flags::BORDER;
}
for i in 0..4 {
ui_meta.vertices.push(UiVertex {
position: positions_clipped[i].into(),
uv: uvs[i].into(),
color,
flags: flags | shader_flags::CORNERS[i],
radius: [
border_radius.top_left,
border_radius.top_right,
border_radius.bottom_right,
border_radius.bottom_left,
],
border: [border.left, border.top, border.right, border.bottom],
size: rect_size.xy().into(),
});
}
for &i in &QUAD_INDICES {
ui_meta.indices.push(indices_index + i as u32);
}
vertices_index += 6;
indices_index += 4;
}
ExtractedUiItem::Glyphs {
atlas_scaling,
range,
} => {
let image = gpu_images
.get(extracted_uinode.image)
.expect("Image was checked during batching and should still exist");
let atlas_extent = image.size.as_vec2() * *atlas_scaling;
let color = extracted_uinode.color.to_f32_array();
for glyph in &extracted_uinodes.glyphs[range.clone()] {
let glyph_rect = glyph.rect;
let size = glyph.rect.size();
let rect_size = glyph_rect.size().extend(1.0);
// Specify the corners of the glyph
let positions = QUAD_VERTEX_POSITIONS.map(|pos| {
(glyph.transform * (pos * rect_size).extend(1.)).xyz()
});
let positions_diff = if let Some(clip) = extracted_uinode.clip {
[
Vec2::new(
f32::max(clip.min.x - positions[0].x, 0.),
f32::max(clip.min.y - positions[0].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[1].x, 0.),
f32::max(clip.min.y - positions[1].y, 0.),
),
Vec2::new(
f32::min(clip.max.x - positions[2].x, 0.),
f32::min(clip.max.y - positions[2].y, 0.),
),
Vec2::new(
f32::max(clip.min.x - positions[3].x, 0.),
f32::min(clip.max.y - positions[3].y, 0.),
),
]
} else {
[Vec2::ZERO; 4]
};
let positions_clipped = [
positions[0] + positions_diff[0].extend(0.),
positions[1] + positions_diff[1].extend(0.),
positions[2] + positions_diff[2].extend(0.),
positions[3] + positions_diff[3].extend(0.),
];
// cull nodes that are completely clipped
let transformed_rect_size =
glyph.transform.transform_vector3(rect_size);
if positions_diff[0].x - positions_diff[1].x
>= transformed_rect_size.x
|| positions_diff[1].y - positions_diff[2].y
>= transformed_rect_size.y
{
continue;
}
let uvs = [
Vec2::new(
glyph.rect.min.x + positions_diff[0].x,
glyph.rect.min.y + positions_diff[0].y,
),
Vec2::new(
glyph.rect.max.x + positions_diff[1].x,
glyph.rect.min.y + positions_diff[1].y,
),
Vec2::new(
glyph.rect.max.x + positions_diff[2].x,
glyph.rect.max.y + positions_diff[2].y,
),
Vec2::new(
glyph.rect.min.x + positions_diff[3].x,
glyph.rect.max.y + positions_diff[3].y,
),
]
.map(|pos| pos / atlas_extent);
for i in 0..4 {
ui_meta.vertices.push(UiVertex {
position: positions_clipped[i].into(),
uv: uvs[i].into(),
color,
flags: shader_flags::TEXTURED,
radius: [0.0; 4],
border: [0.0; 4],
size: size.into(),
});
}
for &i in &QUAD_INDICES {
ui_meta.indices.push(indices_index + i as u32);
}
vertices_index += 6;
indices_index += 4;
}
}
}
let uvs = if flags == shader_flags::UNTEXTURED {
[Vec2::ZERO, Vec2::X, Vec2::ONE, Vec2::Y]
} else {
let image = gpu_images
.get(extracted_uinode.image)
.expect("Image was checked during batching and should still exist");
// Rescale atlases. This is done here because we need texture data that might not be available in Extract.
let atlas_extent = extracted_uinode
.atlas_scaling
.map(|scaling| image.size.as_vec2() * scaling)
.unwrap_or(uinode_rect.max);
if extracted_uinode.flip_x {
core::mem::swap(&mut uinode_rect.max.x, &mut uinode_rect.min.x);
positions_diff[0].x *= -1.;
positions_diff[1].x *= -1.;
positions_diff[2].x *= -1.;
positions_diff[3].x *= -1.;
}
if extracted_uinode.flip_y {
core::mem::swap(&mut uinode_rect.max.y, &mut uinode_rect.min.y);
positions_diff[0].y *= -1.;
positions_diff[1].y *= -1.;
positions_diff[2].y *= -1.;
positions_diff[3].y *= -1.;
}
[
Vec2::new(
uinode_rect.min.x + positions_diff[0].x,
uinode_rect.min.y + positions_diff[0].y,
),
Vec2::new(
uinode_rect.max.x + positions_diff[1].x,
uinode_rect.min.y + positions_diff[1].y,
),
Vec2::new(
uinode_rect.max.x + positions_diff[2].x,
uinode_rect.max.y + positions_diff[2].y,
),
Vec2::new(
uinode_rect.min.x + positions_diff[3].x,
uinode_rect.max.y + positions_diff[3].y,
),
]
.map(|pos| pos / atlas_extent)
};
let color = extracted_uinode.color.to_f32_array();
if extracted_uinode.node_type == NodeType::Border {
flags |= shader_flags::BORDER;
}
for i in 0..4 {
ui_meta.vertices.push(UiVertex {
position: positions_clipped[i].into(),
uv: uvs[i].into(),
color,
flags: flags | shader_flags::CORNERS[i],
radius: [
extracted_uinode.border_radius.top_left,
extracted_uinode.border_radius.top_right,
extracted_uinode.border_radius.bottom_right,
extracted_uinode.border_radius.bottom_left,
],
border: [
extracted_uinode.border.left,
extracted_uinode.border.top,
extracted_uinode.border.right,
extracted_uinode.border.bottom,
],
size: rect_size.xy().into(),
});
}
for &i in &QUAD_INDICES {
ui_meta.indices.push(indices_index + i as u32);
}
vertices_index += 6;
indices_index += 4;
existing_batch.unwrap().1.range.end = vertices_index;
ui_phase.items[batch_item_index].batch_range_mut().end += 1;
} else {
@ -1022,10 +1181,11 @@ pub fn prepare_uinodes(
}
}
}
ui_meta.vertices.write_buffer(&render_device, &render_queue);
ui_meta.indices.write_buffer(&render_device, &render_queue);
*previous_len = batches.len();
commands.insert_or_spawn_batch(batches);
}
extracted_uinodes.uinodes.clear();
extracted_uinodes.clear();
}