Add BufferVec, an higher-performance alternative to StorageBuffer, and make GpuArrayBuffer use it. (#13199)

This is an adoption of #12670 plus some documentation fixes. See that PR
for more details.

---

## Changelog

* Renamed `BufferVec` to `RawBufferVec` and added a new `BufferVec`
type.

## Migration Guide
`BufferVec` has been renamed to `RawBufferVec` and a new similar type
has taken the `BufferVec` name.

---------

Co-authored-by: Patrick Walton <pcwalton@mimiga.net>
Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com>
Co-authored-by: IceSentry <IceSentry@users.noreply.github.com>
This commit is contained in:
Kristoffer Søholm 2024-05-03 13:39:21 +02:00 committed by GitHub
parent 64c1c65783
commit 2089a28717
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 220 additions and 51 deletions

View file

@ -331,7 +331,7 @@ pub struct MeshCullingData {
/// To avoid wasting CPU time in the CPU culling case, this buffer will be empty
/// if GPU culling isn't in use.
#[derive(Resource, Deref, DerefMut)]
pub struct MeshCullingDataBuffer(BufferVec<MeshCullingData>);
pub struct MeshCullingDataBuffer(RawBufferVec<MeshCullingData>);
impl MeshUniform {
pub fn new(mesh_transforms: &MeshTransforms, maybe_lightmap_uv_rect: Option<Rect>) -> Self {
@ -685,7 +685,7 @@ impl RenderMeshInstanceGpuBuilder {
self,
entity: Entity,
render_mesh_instances: &mut EntityHashMap<RenderMeshInstanceGpu>,
current_input_buffer: &mut BufferVec<MeshInputUniform>,
current_input_buffer: &mut RawBufferVec<MeshInputUniform>,
) -> usize {
// Push the mesh input uniform.
let current_uniform_index = current_input_buffer.push(MeshInputUniform {
@ -742,7 +742,7 @@ impl MeshCullingData {
impl Default for MeshCullingDataBuffer {
#[inline]
fn default() -> Self {
Self(BufferVec::new(BufferUsages::STORAGE))
Self(RawBufferVec::new(BufferUsages::STORAGE))
}
}

View file

@ -6,7 +6,7 @@ use bevy_ecs::prelude::*;
use bevy_render::{
batching::NoAutomaticBatching,
mesh::morph::{MeshMorphWeights, MAX_MORPH_WEIGHTS},
render_resource::{BufferUsages, BufferVec},
render_resource::{BufferUsages, RawBufferVec},
renderer::{RenderDevice, RenderQueue},
view::ViewVisibility,
Extract,
@ -23,13 +23,13 @@ pub struct MorphIndices(EntityHashMap<MorphIndex>);
#[derive(Resource)]
pub struct MorphUniform {
pub buffer: BufferVec<f32>,
pub buffer: RawBufferVec<f32>,
}
impl Default for MorphUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
buffer: RawBufferVec::new(BufferUsages::UNIFORM),
}
}
}
@ -53,14 +53,14 @@ const fn can_align(step: usize, target: usize) -> bool {
const WGPU_MIN_ALIGN: usize = 256;
/// Align a [`BufferVec`] to `N` bytes by padding the end with `T::default()` values.
fn add_to_alignment<T: NoUninit + Default>(buffer: &mut BufferVec<T>) {
/// Align a [`RawBufferVec`] to `N` bytes by padding the end with `T::default()` values.
fn add_to_alignment<T: NoUninit + Default>(buffer: &mut RawBufferVec<T>) {
let n = WGPU_MIN_ALIGN;
let t_size = mem::size_of::<T>();
if !can_align(n, t_size) {
// This panic is stripped at compile time, due to n, t_size and can_align being const
panic!(
"BufferVec should contain only types with a size multiple or divisible by {n}, \
"RawBufferVec should contain only types with a size multiple or divisible by {n}, \
{} has a size of {t_size}, which is neither multiple or divisible by {n}",
std::any::type_name::<T>()
);

View file

@ -6,7 +6,7 @@ use bevy_math::Mat4;
use bevy_render::{
batching::NoAutomaticBatching,
mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
render_resource::{BufferUsages, BufferVec},
render_resource::{BufferUsages, RawBufferVec},
renderer::{RenderDevice, RenderQueue},
view::ViewVisibility,
Extract,
@ -36,13 +36,13 @@ pub struct SkinIndices(EntityHashMap<SkinIndex>);
// Notes on implementation: see comment on top of the `extract_skins` system.
#[derive(Resource)]
pub struct SkinUniform {
pub buffer: BufferVec<Mat4>,
pub buffer: RawBufferVec<Mat4>,
}
impl Default for SkinUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
buffer: RawBufferVec::new(BufferUsages::UNIFORM),
}
}
}

View file

@ -21,7 +21,7 @@ use crate::{
BinnedPhaseItem, BinnedRenderPhase, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem,
PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
},
render_resource::{BufferVec, GpuArrayBufferable, UninitBufferVec},
render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
renderer::{RenderAdapter, RenderDevice, RenderQueue},
view::{GpuCulling, ViewTarget},
Render, RenderApp, RenderSet,
@ -101,7 +101,7 @@ where
/// The uniform data inputs for the current frame.
///
/// These are uploaded during the extraction phase.
pub current_input_buffer: BufferVec<BDI>,
pub current_input_buffer: RawBufferVec<BDI>,
/// The uniform data inputs for the previous frame.
///
@ -110,7 +110,7 @@ where
/// can spawn or despawn between frames. Instead, each current buffer
/// data input uniform is expected to contain the index of the
/// corresponding buffer data input uniform in this list.
pub previous_input_buffer: BufferVec<BDI>,
pub previous_input_buffer: RawBufferVec<BDI>,
}
/// The buffer of GPU preprocessing work items for a single view.
@ -247,8 +247,8 @@ where
BatchedInstanceBuffers {
data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
work_item_buffers: EntityHashMap::default(),
current_input_buffer: BufferVec::new(BufferUsages::STORAGE),
previous_input_buffer: BufferVec::new(BufferUsages::STORAGE),
current_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
previous_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
}
}

View file

@ -1,11 +1,15 @@
use std::marker::PhantomData;
use std::{iter, marker::PhantomData};
use crate::{
render_resource::Buffer,
renderer::{RenderDevice, RenderQueue},
};
use bytemuck::{must_cast_slice, NoUninit};
use wgpu::BufferUsages;
use encase::{
internal::{WriteInto, Writer},
ShaderType,
};
use wgpu::{BufferAddress, BufferUsages};
use super::GpuArrayBufferable;
@ -19,9 +23,9 @@ use super::GpuArrayBufferable;
/// Index, vertex, and instance-rate vertex buffers have no alignment nor padding requirements and
/// so this helper type is a good choice for them.
///
/// The contained data is stored in system RAM. Calling [`reserve`](BufferVec::reserve)
/// The contained data is stored in system RAM. Calling [`reserve`](RawBufferVec::reserve)
/// allocates VRAM from the [`RenderDevice`].
/// [`write_buffer`](BufferVec::write_buffer) queues copying of the data
/// [`write_buffer`](RawBufferVec::write_buffer) queues copying of the data
/// from system RAM to VRAM.
///
/// Other options for storing GPU-accessible data are:
@ -32,7 +36,7 @@ use super::GpuArrayBufferable;
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`BufferVec`]
/// * [`Texture`](crate::render_resource::Texture)
pub struct BufferVec<T: NoUninit> {
pub struct RawBufferVec<T: NoUninit> {
values: Vec<T>,
buffer: Option<Buffer>,
capacity: usize,
@ -42,7 +46,7 @@ pub struct BufferVec<T: NoUninit> {
label_changed: bool,
}
impl<T: NoUninit> BufferVec<T> {
impl<T: NoUninit> RawBufferVec<T> {
pub const fn new(buffer_usage: BufferUsages) -> Self {
Self {
values: Vec::new(),
@ -81,7 +85,7 @@ impl<T: NoUninit> BufferVec<T> {
index
}
pub fn append(&mut self, other: &mut BufferVec<T>) {
pub fn append(&mut self, other: &mut RawBufferVec<T>) {
self.values.append(&mut other.values);
}
@ -108,7 +112,7 @@ impl<T: NoUninit> BufferVec<T> {
/// once it is done using them (typically 1-2 frames).
///
/// In addition to any [`BufferUsages`] provided when
/// the `BufferVec` was created, the buffer on the [`RenderDevice`]
/// the `RawBufferVec` was created, the buffer on the [`RenderDevice`]
/// is marked as [`BufferUsages::COPY_DST`](BufferUsages).
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
if capacity > self.capacity || self.label_changed {
@ -116,7 +120,7 @@ impl<T: NoUninit> BufferVec<T> {
let size = self.item_size * capacity;
self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
label: self.label.as_deref(),
size: size as wgpu::BufferAddress,
size: size as BufferAddress,
usage: BufferUsages::COPY_DST | self.buffer_usage,
mapped_at_creation: false,
}));
@ -127,7 +131,7 @@ impl<T: NoUninit> BufferVec<T> {
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
/// and the provided [`RenderQueue`].
///
/// Before queuing the write, a [`reserve`](BufferVec::reserve) operation
/// Before queuing the write, a [`reserve`](RawBufferVec::reserve) operation
/// is executed.
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
if self.values.is_empty() {
@ -158,20 +162,177 @@ impl<T: NoUninit> BufferVec<T> {
}
}
impl<T: NoUninit> Extend<T> for BufferVec<T> {
impl<T: NoUninit> Extend<T> for RawBufferVec<T> {
#[inline]
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
self.values.extend(iter);
}
}
/// Like [`RawBufferVec`], but doesn't require that the data type `T` be
/// [`NoUninit`].
///
/// This is a high-performance data structure that you should use whenever
/// possible if your data is more complex than is suitable for [`RawBufferVec`].
/// The [`ShaderType`] trait from the `encase` library is used to ensure that
/// the data is correctly aligned for use by the GPU.
///
/// For performance reasons, unlike [`RawBufferVec`], this type doesn't allow
/// CPU access to the data after it's been added via [`BufferVec::push`]. If you
/// need CPU access to the data, consider another type, such as
/// [`StorageBuffer`].
pub struct BufferVec<T>
where
T: ShaderType + WriteInto,
{
data: Vec<u8>,
buffer: Option<Buffer>,
capacity: usize,
buffer_usage: BufferUsages,
label: Option<String>,
label_changed: bool,
phantom: PhantomData<T>,
}
impl<T> BufferVec<T>
where
T: ShaderType + WriteInto,
{
/// Creates a new [`BufferVec`] with the given [`BufferUsages`].
pub const fn new(buffer_usage: BufferUsages) -> Self {
Self {
data: vec![],
buffer: None,
capacity: 0,
buffer_usage,
label: None,
label_changed: false,
phantom: PhantomData,
}
}
/// Returns a handle to the buffer, if the data has been uploaded.
#[inline]
pub fn buffer(&self) -> Option<&Buffer> {
self.buffer.as_ref()
}
/// Returns the amount of space that the GPU will use before reallocating.
#[inline]
pub fn capacity(&self) -> usize {
self.capacity
}
/// Returns the number of items that have been pushed to this buffer.
#[inline]
pub fn len(&self) -> usize {
self.data.len() / u64::from(T::min_size()) as usize
}
/// Returns true if the buffer is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
/// Adds a new value and returns its index.
pub fn push(&mut self, value: T) -> usize {
let element_size = u64::from(T::min_size()) as usize;
let offset = self.data.len();
// TODO: Consider using unsafe code to push uninitialized, to prevent
// the zeroing. It shows up in profiles.
self.data.extend(iter::repeat(0).take(element_size));
// Take a slice of the new data for `write_into` to use. This is
// important: it hoists the bounds check up here so that the compiler
// can eliminate all the bounds checks that `write_into` will emit.
let mut dest = &mut self.data[offset..(offset + element_size)];
value.write_into(&mut Writer::new(&value, &mut dest, 0).unwrap());
offset / u64::from(T::min_size()) as usize
}
/// Changes the debugging label of the buffer.
///
/// The next time the buffer is updated (via [`reserve`]), Bevy will inform
/// the driver of the new label.
pub fn set_label(&mut self, label: Option<&str>) {
let label = label.map(str::to_string);
if label != self.label {
self.label_changed = true;
}
self.label = label;
}
/// Returns the label
pub fn get_label(&self) -> Option<&str> {
self.label.as_deref()
}
/// Creates a [`Buffer`] on the [`RenderDevice`] with size
/// at least `std::mem::size_of::<T>() * capacity`, unless such a buffer already exists.
///
/// If a [`Buffer`] exists, but is too small, references to it will be discarded,
/// and a new [`Buffer`] will be created. Any previously created [`Buffer`]s
/// that are no longer referenced will be deleted by the [`RenderDevice`]
/// once it is done using them (typically 1-2 frames).
///
/// In addition to any [`BufferUsages`] provided when
/// the `BufferVec` was created, the buffer on the [`RenderDevice`]
/// is marked as [`BufferUsages::COPY_DST`](BufferUsages).
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
if capacity <= self.capacity && !self.label_changed {
return;
}
self.capacity = capacity;
let size = u64::from(T::min_size()) as usize * capacity;
self.buffer = Some(device.create_buffer(&wgpu::BufferDescriptor {
label: self.label.as_deref(),
size: size as BufferAddress,
usage: BufferUsages::COPY_DST | self.buffer_usage,
mapped_at_creation: false,
}));
self.label_changed = false;
}
/// Queues writing of data from system RAM to VRAM using the [`RenderDevice`]
/// and the provided [`RenderQueue`].
///
/// Before queuing the write, a [`reserve`](BufferVec::reserve) operation is
/// executed.
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
if self.data.is_empty() {
return;
}
self.reserve(self.data.len() / u64::from(T::min_size()) as usize, device);
let Some(buffer) = &self.buffer else { return };
queue.write_buffer(buffer, 0, &self.data);
}
/// Reduces the length of the buffer.
pub fn truncate(&mut self, len: usize) {
self.data.truncate(u64::from(T::min_size()) as usize * len);
}
/// Removes all elements from the buffer.
pub fn clear(&mut self) {
self.data.clear();
}
}
/// Like a [`BufferVec`], but only reserves space on the GPU for elements
/// instead of initializing them CPU-side.
///
/// This type is useful when you're accumulating "output slots" for a GPU
/// compute shader to write into.
///
/// The type `T` need not be [`NoUninit`], unlike [`BufferVec`]; it only has to
/// The type `T` need not be [`NoUninit`], unlike [`RawBufferVec`]; it only has to
/// be [`GpuArrayBufferable`].
pub struct UninitBufferVec<T>
where

View file

@ -1,6 +1,6 @@
use super::{
binding_types::{storage_buffer_read_only, uniform_buffer_sized},
BindGroupLayoutEntryBuilder, StorageBuffer,
BindGroupLayoutEntryBuilder, BufferVec,
};
use crate::{
render_resource::batched_uniform_buffer::BatchedUniformBuffer,
@ -10,7 +10,7 @@ use bevy_ecs::{prelude::Component, system::Resource};
use encase::{private::WriteInto, ShaderSize, ShaderType};
use nonmax::NonMaxU32;
use std::marker::PhantomData;
use wgpu::BindingResource;
use wgpu::{BindingResource, BufferUsages};
/// Trait for types able to go in a [`GpuArrayBuffer`].
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
@ -18,21 +18,23 @@ impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}
/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
///
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`].
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest
/// array of T that fits within a uniform buffer binding (within reasonable limits).
/// On platforms that support storage buffers, this is equivalent to
/// [`BufferVec<T>`]. Otherwise, this falls back to a dynamic offset
/// uniform buffer with the largest array of T that fits within a uniform buffer
/// binding (within reasonable limits).
///
/// Other options for storing GPU-accessible data are:
/// * [`StorageBuffer`]
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
#[derive(Resource)]
pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
Uniform(BatchedUniformBuffer<T>),
Storage(StorageBuffer<Vec<T>>),
Storage(BufferVec<T>),
}
impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
@ -41,14 +43,14 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
if limits.max_storage_buffers_per_shader_stage == 0 {
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
} else {
GpuArrayBuffer::Storage(StorageBuffer::default())
GpuArrayBuffer::Storage(BufferVec::new(BufferUsages::STORAGE))
}
}
pub fn clear(&mut self) {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
GpuArrayBuffer::Storage(buffer) => buffer.get_mut().clear(),
GpuArrayBuffer::Storage(buffer) => buffer.clear(),
}
}
@ -56,9 +58,7 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
GpuArrayBuffer::Storage(buffer) => {
let buffer = buffer.get_mut();
let index = buffer.len() as u32;
buffer.push(value);
let index = buffer.push(value) as u32;
GpuArrayBufferIndex {
index,
dynamic_offset: None,
@ -91,7 +91,9 @@ impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
GpuArrayBuffer::Storage(buffer) => buffer.binding(),
GpuArrayBuffer::Storage(buffer) => {
buffer.buffer().map(|buffer| buffer.as_entire_binding())
}
}
}

View file

@ -24,6 +24,8 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
@ -154,6 +156,8 @@ impl<T: ShaderType + WriteInto> StorageBuffer<T> {
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///

View file

@ -31,6 +31,7 @@ use super::IntoBinding;
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`DynamicUniformBuffer`]
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
@ -168,6 +169,7 @@ impl<'a, T: ShaderType + WriteInto> IntoBinding<'a> for &'a UniformBuffer<T> {
/// * [`UniformBuffer`]
/// * [`DynamicUniformBuffer`]
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`RawBufferVec`](crate::render_resource::RawBufferVec)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///

View file

@ -413,16 +413,16 @@ impl SpriteInstance {
#[derive(Resource)]
pub struct SpriteMeta {
view_bind_group: Option<BindGroup>,
sprite_index_buffer: BufferVec<u32>,
sprite_instance_buffer: BufferVec<SpriteInstance>,
sprite_index_buffer: RawBufferVec<u32>,
sprite_instance_buffer: RawBufferVec<SpriteInstance>,
}
impl Default for SpriteMeta {
fn default() -> Self {
Self {
view_bind_group: None,
sprite_index_buffer: BufferVec::<u32>::new(BufferUsages::INDEX),
sprite_instance_buffer: BufferVec::<SpriteInstance>::new(BufferUsages::VERTEX),
sprite_index_buffer: RawBufferVec::<u32>::new(BufferUsages::INDEX),
sprite_instance_buffer: RawBufferVec::<SpriteInstance>::new(BufferUsages::VERTEX),
}
}
}

View file

@ -834,16 +834,16 @@ struct UiVertex {
#[derive(Resource)]
pub struct UiMeta {
vertices: BufferVec<UiVertex>,
indices: BufferVec<u32>,
vertices: RawBufferVec<UiVertex>,
indices: RawBufferVec<u32>,
view_bind_group: Option<BindGroup>,
}
impl Default for UiMeta {
fn default() -> Self {
Self {
vertices: BufferVec::new(BufferUsages::VERTEX),
indices: BufferVec::new(BufferUsages::INDEX),
vertices: RawBufferVec::new(BufferUsages::VERTEX),
indices: RawBufferVec::new(BufferUsages::INDEX),
view_bind_group: None,
}
}

View file

@ -91,7 +91,7 @@ where
#[derive(Resource)]
pub struct UiMaterialMeta<M: UiMaterial> {
vertices: BufferVec<UiMaterialVertex>,
vertices: RawBufferVec<UiMaterialVertex>,
view_bind_group: Option<BindGroup>,
marker: PhantomData<M>,
}
@ -99,7 +99,7 @@ pub struct UiMaterialMeta<M: UiMaterial> {
impl<M: UiMaterial> Default for UiMaterialMeta<M> {
fn default() -> Self {
Self {
vertices: BufferVec::new(BufferUsages::VERTEX),
vertices: RawBufferVec::new(BufferUsages::VERTEX),
view_bind_group: Default::default(),
marker: PhantomData,
}