Directly copy data into uniform buffers (#9865)

# Objective
This is a minimally disruptive version of #8340. I attempted to update
it, but failed due to the scope of the changes added in #8204.

Fixes #8307. Partially addresses #4642. As seen in
https://github.com/bevyengine/bevy/issues/8284, we're actually copying
data twice in Prepare stage systems. Once into a CPU-side intermediate
scratch buffer, and once again into a mapped buffer. This is inefficient
and effectively doubles the time spent and memory allocated to run these
systems.

## Solution
Skip the scratch buffer entirely and use
`wgpu::Queue::write_buffer_with` to directly write data into mapped
buffers.

Separately, this also directly uses
`wgpu::Limits::min_uniform_buffer_offset_alignment` to set up the
alignment when writing to the buffers. Partially addressing the issue
raised in #4642.

Storage buffers and the abstractions built on top of
`DynamicUniformBuffer` will need to come in followup PRs.

This may not have a noticeable performance difference in this PR, as the
only first-party systems affected by this are view related, and likely
are not going to be particularly heavy.

---

## Changelog
Added: `DynamicUniformBuffer::get_writer`.
Added: `DynamicUniformBufferWriter`.
This commit is contained in:
James Liu 2023-09-25 12:15:37 -07:00 committed by GitHub
parent 35de5e608e
commit 12032cd296
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 154 additions and 40 deletions

View file

@ -667,9 +667,16 @@ pub fn prepare_previous_view_projection_uniforms(
With<MotionVectorPrepass>,
>,
) {
view_uniforms.uniforms.clear();
for (entity, camera, maybe_previous_view_proj) in &views {
let views_iter = views.iter();
let view_count = views_iter.len();
let Some(mut writer) =
view_uniforms
.uniforms
.get_writer(view_count, &render_device, &render_queue)
else {
return;
};
for (entity, camera, maybe_previous_view_proj) in views_iter {
let view_projection = match maybe_previous_view_proj {
Some(previous_view) => previous_view.clone(),
None => PreviousViewProjection {
@ -679,13 +686,9 @@ pub fn prepare_previous_view_projection_uniforms(
commands
.entity(entity)
.insert(PreviousViewProjectionUniformOffset {
offset: view_uniforms.uniforms.push(view_projection),
offset: writer.write(&view_projection),
});
}
view_uniforms
.uniforms
.write_buffer(&render_device, &render_queue);
}
#[derive(Default, Resource)]

View file

@ -52,9 +52,15 @@ pub fn prepare_fog(
mut fog_meta: ResMut<FogMeta>,
views: Query<(Entity, Option<&FogSettings>), With<ExtractedView>>,
) {
fog_meta.gpu_fogs.clear();
for (entity, fog) in &views {
let views_iter = views.iter();
let view_count = views_iter.len();
let Some(mut writer) = fog_meta
.gpu_fogs
.get_writer(view_count, &render_device, &render_queue)
else {
return;
};
for (entity, fog) in views_iter {
let gpu_fog = if let Some(fog) = fog {
match &fog.falloff {
FogFalloff::Linear { start, end } => GpuFog {
@ -103,13 +109,9 @@ pub fn prepare_fog(
// This is later read by `SetMeshViewBindGroup<I>`
commands.entity(entity).insert(ViewFogUniformOffset {
offset: fog_meta.gpu_fogs.push(gpu_fog),
offset: writer.write(&gpu_fog),
});
}
fog_meta
.gpu_fogs
.write_buffer(&render_device, &render_queue);
}
/// Inserted on each `Entity` with an `ExtractedView` to keep track of its offset

View file

@ -667,7 +667,15 @@ pub fn prepare_lights(
point_lights: Query<(Entity, &ExtractedPointLight)>,
directional_lights: Query<(Entity, &ExtractedDirectionalLight)>,
) {
light_meta.view_gpu_lights.clear();
let views_iter = views.iter();
let views_count = views_iter.len();
let Some(mut view_gpu_lights_writer) =
light_meta
.view_gpu_lights
.get_writer(views_count, &render_device, &render_queue)
else {
return;
};
// Pre-calculate for PointLights
let cube_face_projection =
@ -1198,14 +1206,10 @@ pub fn prepare_lights(
lights: view_lights,
},
ViewLightsUniformOffset {
offset: light_meta.view_gpu_lights.push(gpu_lights),
offset: view_gpu_lights_writer.write(&gpu_lights),
},
));
}
light_meta
.view_gpu_lights
.write_buffer(&render_device, &render_queue);
}
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl

View file

@ -132,24 +132,27 @@ fn prepare_uniform_components<C: Component>(
) where
C: ShaderType + WriteInto + Clone,
{
component_uniforms.uniforms.clear();
let entities = components
.iter()
let components_iter = components.iter();
let count = components_iter.len();
let Some(mut writer) =
component_uniforms
.uniforms
.get_writer(count, &render_device, &render_queue)
else {
return;
};
let entities = components_iter
.map(|(entity, component)| {
(
entity,
DynamicUniformIndex::<C> {
index: component_uniforms.uniforms.push(component.clone()),
index: writer.write(component),
marker: PhantomData,
},
)
})
.collect::<Vec<_>>();
commands.insert_or_spawn_batch(entities);
component_uniforms
.uniforms
.write_buffer(&render_device, &render_queue);
}
/// This plugin extracts the components into the "render world".

View file

@ -1,14 +1,17 @@
use std::marker::PhantomData;
use std::{marker::PhantomData, num::NonZeroU64};
use crate::{
render_resource::Buffer,
renderer::{RenderDevice, RenderQueue},
};
use encase::{
internal::WriteInto, DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
internal::{AlignmentValue, BufferMut, WriteInto},
DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
UniformBuffer as UniformBufferWrapper,
};
use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsages};
use wgpu::{
util::BufferInitDescriptor, BindingResource, BufferBinding, BufferDescriptor, BufferUsages,
};
/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer.
///
@ -240,6 +243,67 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
self.changed = true;
}
/// Creates a writer that can be used to directly write elements into the target buffer.
///
/// This method uses less memory and performs fewer memory copies using over [`push`] and [`write_buffer`].
///
/// `max_count` *must* be greater than or equal to the number of elements that are to be written to the buffer, or
/// the writer will panic while writing. Dropping the writer will schedule the buffer write into the provided
/// [`RenderQueue`](crate::renderer::RenderQueue).
///
/// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
/// allocated does not have enough capacity to hold `max_count` elements, a new GPU-side buffer is created.
///
/// Returns `None` if there is no allocated GPU-side buffer, and `max_count` is 0.
///
/// [`push`]: Self::push
/// [`write_buffer`]: Self::write_buffer
#[inline]
pub fn get_writer<'a>(
&'a mut self,
max_count: usize,
device: &RenderDevice,
queue: &'a RenderQueue,
) -> Option<DynamicUniformBufferWriter<'a, T>> {
let alignment =
AlignmentValue::new(device.limits().min_uniform_buffer_offset_alignment as u64);
let mut capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
let size = alignment
.round_up(T::min_size().get())
.checked_mul(max_count as u64)
.unwrap();
if capacity < size || self.changed {
let buffer = device.create_buffer(&BufferDescriptor {
label: self.label.as_deref(),
usage: self.buffer_usage,
size,
mapped_at_creation: false,
});
capacity = buffer.size();
self.buffer = Some(buffer);
self.changed = false;
}
if let Some(buffer) = self.buffer.as_deref() {
let buffer_view = queue
.write_buffer_with(buffer, 0, NonZeroU64::new(buffer.size())?)
.unwrap();
Some(DynamicUniformBufferWriter {
buffer: encase::DynamicUniformBuffer::new_with_alignment(
QueueWriteBufferViewWrapper {
capacity: capacity as usize,
buffer_view,
},
alignment.get(),
),
_marker: PhantomData,
})
} else {
None
}
}
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`](crate::renderer::RenderDevice)
/// and the provided [`RenderQueue`](crate::renderer::RenderQueue).
///
@ -268,3 +332,38 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
self.scratch.set_offset(0);
}
}
/// A writer that can be used to directly write elements into the target buffer.
///
/// For more information, see [`DynamicUniformBuffer::get_writer`].
pub struct DynamicUniformBufferWriter<'a, T> {
buffer: encase::DynamicUniformBuffer<QueueWriteBufferViewWrapper<'a>>,
_marker: PhantomData<fn() -> T>,
}
impl<'a, T: ShaderType + WriteInto> DynamicUniformBufferWriter<'a, T> {
pub fn write(&mut self, value: &T) -> u32 {
self.buffer.write(value).unwrap() as u32
}
}
/// A wrapper to work around the orphan rule so that [`wgpu::QueueWriteBufferView`] can implement
/// [`encase::internal::BufferMut`].
struct QueueWriteBufferViewWrapper<'a> {
buffer_view: wgpu::QueueWriteBufferView<'a>,
// Must be kept separately and cannot be retrieved from buffer_view, as the read-only access will
// invoke a panic.
capacity: usize,
}
impl<'a> BufferMut for QueueWriteBufferViewWrapper<'a> {
#[inline]
fn capacity(&self) -> usize {
self.capacity
}
#[inline]
fn write<const N: usize>(&mut self, offset: usize, val: &[u8; N]) {
self.buffer_view.write(offset, val);
}
}

View file

@ -356,8 +356,15 @@ pub fn prepare_view_uniforms(
Option<&MipBias>,
)>,
) {
view_uniforms.uniforms.clear();
let view_iter = views.iter();
let view_count = view_iter.len();
let Some(mut writer) =
view_uniforms
.uniforms
.get_writer(view_count, &render_device, &render_queue)
else {
return;
};
for (entity, camera, temporal_jitter, mip_bias) in &views {
let viewport = camera.viewport.as_vec4();
let unjittered_projection = camera.projection;
@ -380,7 +387,7 @@ pub fn prepare_view_uniforms(
};
let view_uniforms = ViewUniformOffset {
offset: view_uniforms.uniforms.push(ViewUniform {
offset: writer.write(&ViewUniform {
view_proj,
unjittered_view_proj: unjittered_projection * inverse_view,
inverse_view_proj: view * inverse_projection,
@ -397,10 +404,6 @@ pub fn prepare_view_uniforms(
commands.entity(entity).insert(view_uniforms);
}
view_uniforms
.uniforms
.write_buffer(&render_device, &render_queue);
}
#[derive(Clone)]