mirror of
https://github.com/bevyengine/bevy
synced 2024-11-22 04:33:37 +00:00
Directly copy data into uniform buffers (#9865)
# Objective This is a minimally disruptive version of #8340. I attempted to update it, but failed due to the scope of the changes added in #8204. Fixes #8307. Partially addresses #4642. As seen in https://github.com/bevyengine/bevy/issues/8284, we're actually copying data twice in Prepare stage systems. Once into a CPU-side intermediate scratch buffer, and once again into a mapped buffer. This is inefficient and effectively doubles the time spent and memory allocated to run these systems. ## Solution Skip the scratch buffer entirely and use `wgpu::Queue::write_buffer_with` to directly write data into mapped buffers. Separately, this also directly uses `wgpu::Limits::min_uniform_buffer_offset_alignment` to set up the alignment when writing to the buffers. Partially addressing the issue raised in #4642. Storage buffers and the abstractions built on top of `DynamicUniformBuffer` will need to come in followup PRs. This may not have a noticeable performance difference in this PR, as the only first-party systems affected by this are view related, and likely are not going to be particularly heavy. --- ## Changelog Added: `DynamicUniformBuffer::get_writer`. Added: `DynamicUniformBufferWriter`.
This commit is contained in:
parent
35de5e608e
commit
12032cd296
6 changed files with 154 additions and 40 deletions
|
@ -667,9 +667,16 @@ pub fn prepare_previous_view_projection_uniforms(
|
|||
With<MotionVectorPrepass>,
|
||||
>,
|
||||
) {
|
||||
view_uniforms.uniforms.clear();
|
||||
|
||||
for (entity, camera, maybe_previous_view_proj) in &views {
|
||||
let views_iter = views.iter();
|
||||
let view_count = views_iter.len();
|
||||
let Some(mut writer) =
|
||||
view_uniforms
|
||||
.uniforms
|
||||
.get_writer(view_count, &render_device, &render_queue)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
for (entity, camera, maybe_previous_view_proj) in views_iter {
|
||||
let view_projection = match maybe_previous_view_proj {
|
||||
Some(previous_view) => previous_view.clone(),
|
||||
None => PreviousViewProjection {
|
||||
|
@ -679,13 +686,9 @@ pub fn prepare_previous_view_projection_uniforms(
|
|||
commands
|
||||
.entity(entity)
|
||||
.insert(PreviousViewProjectionUniformOffset {
|
||||
offset: view_uniforms.uniforms.push(view_projection),
|
||||
offset: writer.write(&view_projection),
|
||||
});
|
||||
}
|
||||
|
||||
view_uniforms
|
||||
.uniforms
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
|
||||
#[derive(Default, Resource)]
|
||||
|
|
|
@ -52,9 +52,15 @@ pub fn prepare_fog(
|
|||
mut fog_meta: ResMut<FogMeta>,
|
||||
views: Query<(Entity, Option<&FogSettings>), With<ExtractedView>>,
|
||||
) {
|
||||
fog_meta.gpu_fogs.clear();
|
||||
|
||||
for (entity, fog) in &views {
|
||||
let views_iter = views.iter();
|
||||
let view_count = views_iter.len();
|
||||
let Some(mut writer) = fog_meta
|
||||
.gpu_fogs
|
||||
.get_writer(view_count, &render_device, &render_queue)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
for (entity, fog) in views_iter {
|
||||
let gpu_fog = if let Some(fog) = fog {
|
||||
match &fog.falloff {
|
||||
FogFalloff::Linear { start, end } => GpuFog {
|
||||
|
@ -103,13 +109,9 @@ pub fn prepare_fog(
|
|||
|
||||
// This is later read by `SetMeshViewBindGroup<I>`
|
||||
commands.entity(entity).insert(ViewFogUniformOffset {
|
||||
offset: fog_meta.gpu_fogs.push(gpu_fog),
|
||||
offset: writer.write(&gpu_fog),
|
||||
});
|
||||
}
|
||||
|
||||
fog_meta
|
||||
.gpu_fogs
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
|
||||
/// Inserted on each `Entity` with an `ExtractedView` to keep track of its offset
|
||||
|
|
|
@ -667,7 +667,15 @@ pub fn prepare_lights(
|
|||
point_lights: Query<(Entity, &ExtractedPointLight)>,
|
||||
directional_lights: Query<(Entity, &ExtractedDirectionalLight)>,
|
||||
) {
|
||||
light_meta.view_gpu_lights.clear();
|
||||
let views_iter = views.iter();
|
||||
let views_count = views_iter.len();
|
||||
let Some(mut view_gpu_lights_writer) =
|
||||
light_meta
|
||||
.view_gpu_lights
|
||||
.get_writer(views_count, &render_device, &render_queue)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
// Pre-calculate for PointLights
|
||||
let cube_face_projection =
|
||||
|
@ -1198,14 +1206,10 @@ pub fn prepare_lights(
|
|||
lights: view_lights,
|
||||
},
|
||||
ViewLightsUniformOffset {
|
||||
offset: light_meta.view_gpu_lights.push(gpu_lights),
|
||||
offset: view_gpu_lights_writer.write(&gpu_lights),
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
light_meta
|
||||
.view_gpu_lights
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
|
||||
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
|
||||
|
|
|
@ -132,24 +132,27 @@ fn prepare_uniform_components<C: Component>(
|
|||
) where
|
||||
C: ShaderType + WriteInto + Clone,
|
||||
{
|
||||
component_uniforms.uniforms.clear();
|
||||
let entities = components
|
||||
.iter()
|
||||
let components_iter = components.iter();
|
||||
let count = components_iter.len();
|
||||
let Some(mut writer) =
|
||||
component_uniforms
|
||||
.uniforms
|
||||
.get_writer(count, &render_device, &render_queue)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
let entities = components_iter
|
||||
.map(|(entity, component)| {
|
||||
(
|
||||
entity,
|
||||
DynamicUniformIndex::<C> {
|
||||
index: component_uniforms.uniforms.push(component.clone()),
|
||||
index: writer.write(component),
|
||||
marker: PhantomData,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
commands.insert_or_spawn_batch(entities);
|
||||
|
||||
component_uniforms
|
||||
.uniforms
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
|
||||
/// This plugin extracts the components into the "render world".
|
||||
|
|
|
@ -1,14 +1,17 @@
|
|||
use std::marker::PhantomData;
|
||||
use std::{marker::PhantomData, num::NonZeroU64};
|
||||
|
||||
use crate::{
|
||||
render_resource::Buffer,
|
||||
renderer::{RenderDevice, RenderQueue},
|
||||
};
|
||||
use encase::{
|
||||
internal::WriteInto, DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
|
||||
internal::{AlignmentValue, BufferMut, WriteInto},
|
||||
DynamicUniformBuffer as DynamicUniformBufferWrapper, ShaderType,
|
||||
UniformBuffer as UniformBufferWrapper,
|
||||
};
|
||||
use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsages};
|
||||
use wgpu::{
|
||||
util::BufferInitDescriptor, BindingResource, BufferBinding, BufferDescriptor, BufferUsages,
|
||||
};
|
||||
|
||||
/// Stores data to be transferred to the GPU and made accessible to shaders as a uniform buffer.
|
||||
///
|
||||
|
@ -240,6 +243,67 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
|
|||
self.changed = true;
|
||||
}
|
||||
|
||||
/// Creates a writer that can be used to directly write elements into the target buffer.
|
||||
///
|
||||
/// This method uses less memory and performs fewer memory copies using over [`push`] and [`write_buffer`].
|
||||
///
|
||||
/// `max_count` *must* be greater than or equal to the number of elements that are to be written to the buffer, or
|
||||
/// the writer will panic while writing. Dropping the writer will schedule the buffer write into the provided
|
||||
/// [`RenderQueue`](crate::renderer::RenderQueue).
|
||||
///
|
||||
/// If there is no GPU-side buffer allocated to hold the data currently stored, or if a GPU-side buffer previously
|
||||
/// allocated does not have enough capacity to hold `max_count` elements, a new GPU-side buffer is created.
|
||||
///
|
||||
/// Returns `None` if there is no allocated GPU-side buffer, and `max_count` is 0.
|
||||
///
|
||||
/// [`push`]: Self::push
|
||||
/// [`write_buffer`]: Self::write_buffer
|
||||
#[inline]
|
||||
pub fn get_writer<'a>(
|
||||
&'a mut self,
|
||||
max_count: usize,
|
||||
device: &RenderDevice,
|
||||
queue: &'a RenderQueue,
|
||||
) -> Option<DynamicUniformBufferWriter<'a, T>> {
|
||||
let alignment =
|
||||
AlignmentValue::new(device.limits().min_uniform_buffer_offset_alignment as u64);
|
||||
let mut capacity = self.buffer.as_deref().map(wgpu::Buffer::size).unwrap_or(0);
|
||||
let size = alignment
|
||||
.round_up(T::min_size().get())
|
||||
.checked_mul(max_count as u64)
|
||||
.unwrap();
|
||||
|
||||
if capacity < size || self.changed {
|
||||
let buffer = device.create_buffer(&BufferDescriptor {
|
||||
label: self.label.as_deref(),
|
||||
usage: self.buffer_usage,
|
||||
size,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
capacity = buffer.size();
|
||||
self.buffer = Some(buffer);
|
||||
self.changed = false;
|
||||
}
|
||||
|
||||
if let Some(buffer) = self.buffer.as_deref() {
|
||||
let buffer_view = queue
|
||||
.write_buffer_with(buffer, 0, NonZeroU64::new(buffer.size())?)
|
||||
.unwrap();
|
||||
Some(DynamicUniformBufferWriter {
|
||||
buffer: encase::DynamicUniformBuffer::new_with_alignment(
|
||||
QueueWriteBufferViewWrapper {
|
||||
capacity: capacity as usize,
|
||||
buffer_view,
|
||||
},
|
||||
alignment.get(),
|
||||
),
|
||||
_marker: PhantomData,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`](crate::renderer::RenderDevice)
|
||||
/// and the provided [`RenderQueue`](crate::renderer::RenderQueue).
|
||||
///
|
||||
|
@ -268,3 +332,38 @@ impl<T: ShaderType + WriteInto> DynamicUniformBuffer<T> {
|
|||
self.scratch.set_offset(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// A writer that can be used to directly write elements into the target buffer.
|
||||
///
|
||||
/// For more information, see [`DynamicUniformBuffer::get_writer`].
|
||||
pub struct DynamicUniformBufferWriter<'a, T> {
|
||||
buffer: encase::DynamicUniformBuffer<QueueWriteBufferViewWrapper<'a>>,
|
||||
_marker: PhantomData<fn() -> T>,
|
||||
}
|
||||
|
||||
impl<'a, T: ShaderType + WriteInto> DynamicUniformBufferWriter<'a, T> {
|
||||
pub fn write(&mut self, value: &T) -> u32 {
|
||||
self.buffer.write(value).unwrap() as u32
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper to work around the orphan rule so that [`wgpu::QueueWriteBufferView`] can implement
|
||||
/// [`encase::internal::BufferMut`].
|
||||
struct QueueWriteBufferViewWrapper<'a> {
|
||||
buffer_view: wgpu::QueueWriteBufferView<'a>,
|
||||
// Must be kept separately and cannot be retrieved from buffer_view, as the read-only access will
|
||||
// invoke a panic.
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl<'a> BufferMut for QueueWriteBufferViewWrapper<'a> {
|
||||
#[inline]
|
||||
fn capacity(&self) -> usize {
|
||||
self.capacity
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write<const N: usize>(&mut self, offset: usize, val: &[u8; N]) {
|
||||
self.buffer_view.write(offset, val);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -356,8 +356,15 @@ pub fn prepare_view_uniforms(
|
|||
Option<&MipBias>,
|
||||
)>,
|
||||
) {
|
||||
view_uniforms.uniforms.clear();
|
||||
|
||||
let view_iter = views.iter();
|
||||
let view_count = view_iter.len();
|
||||
let Some(mut writer) =
|
||||
view_uniforms
|
||||
.uniforms
|
||||
.get_writer(view_count, &render_device, &render_queue)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
for (entity, camera, temporal_jitter, mip_bias) in &views {
|
||||
let viewport = camera.viewport.as_vec4();
|
||||
let unjittered_projection = camera.projection;
|
||||
|
@ -380,7 +387,7 @@ pub fn prepare_view_uniforms(
|
|||
};
|
||||
|
||||
let view_uniforms = ViewUniformOffset {
|
||||
offset: view_uniforms.uniforms.push(ViewUniform {
|
||||
offset: writer.write(&ViewUniform {
|
||||
view_proj,
|
||||
unjittered_view_proj: unjittered_projection * inverse_view,
|
||||
inverse_view_proj: view * inverse_projection,
|
||||
|
@ -397,10 +404,6 @@ pub fn prepare_view_uniforms(
|
|||
|
||||
commands.entity(entity).insert(view_uniforms);
|
||||
}
|
||||
|
||||
view_uniforms
|
||||
.uniforms
|
||||
.write_buffer(&render_device, &render_queue);
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
|
Loading…
Reference in a new issue