bevy/crates/bevy_render/src/gpu_component_array_buffer.rs

use crate::{
    render_resource::{GpuArrayBuffer, GpuArrayBufferable},
    renderer::{RenderDevice, RenderQueue},
    Render, RenderApp, RenderSet,
};
use bevy_app::{App, Plugin};
use bevy_ecs::{
    prelude::{Component, Entity},
    schedule::IntoSystemConfigs,
    system::{Commands, Query, Res, ResMut},
};
use std::marker::PhantomData;

/// This plugin prepares the components of the corresponding type for the GPU
/// by storing them in a [`GpuArrayBuffer`].
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);

impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {
    fn build(&self, app: &mut App) {
        if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
            render_app.add_systems(
                Render,
                prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::PrepareResources),
            );
        }
    }

    fn finish(&self, app: &mut App) {
        if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
            render_app.insert_resource(GpuArrayBuffer::<C>::new(
                render_app.world.resource::<RenderDevice>(),
            ));
        }
    }
}

impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {
    fn default() -> Self {
        Self(PhantomData::<C>)
    }
}

fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(
    mut commands: Commands,
    render_device: Res<RenderDevice>,
    render_queue: Res<RenderQueue>,
    mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,
    components: Query<(Entity, &C)>,
) {
    gpu_array_buffer.clear();

    let entities = components
        .iter()
        .map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone())))
        .collect::<Vec<_>>();
    commands.insert_or_spawn_batch(entities);

    gpu_array_buffer.write_buffer(&render_device, &render_queue);
}
Add GpuArrayBuffer and BatchedUniformBuffer (#8204) # Objective - Add a type for uploading a Rust `Vec<T>` to a GPU `array<T>`. - Makes progress towards https://github.com/bevyengine/bevy/issues/89. ## Solution - Port @superdump's `BatchedUniformBuffer` to bevy main, as a fallback for WebGL2, which doesn't support storage buffers. - Rather than getting an `array<T>` in a shader, you get an `array<T, N>`, and have to rebind every N elements via dynamic offsets. - Add `GpuArrayBuffer` to abstract over `StorageBuffer<Vec<T>>`/`BatchedUniformBuffer`. ## Future Work Add a shader macro kinda thing to abstract over the following automatically: https://github.com/bevyengine/bevy/pull/8204#pullrequestreview-1396911727 --- ## Changelog * Added `GpuArrayBuffer`, `GpuComponentArrayBufferPlugin`, `GpuArrayBufferable`, and `GpuArrayBufferIndex` types. * Added `DynamicUniformBuffer::new_with_alignment()`. --------- Co-authored-by: Robert Swain <robert.swain@gmail.com> Co-authored-by: François <mockersf@gmail.com> Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> Co-authored-by: IceSentry <IceSentry@users.noreply.github.com> Co-authored-by: Vincent <9408210+konsolas@users.noreply.github.com> Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com> 2023-07-21 16:46:56 +00:00			`use crate::{`
			`render_resource::{GpuArrayBuffer, GpuArrayBufferable},`
			`renderer::{RenderDevice, RenderQueue},`
			`Render, RenderApp, RenderSet,`
			`};`
			`use bevy_app::{App, Plugin};`
			`use bevy_ecs::{`
			`prelude::{Component, Entity},`
			`schedule::IntoSystemConfigs,`
			`system::{Commands, Query, Res, ResMut},`
			`};`
			`use std::marker::PhantomData;`

			`/// This plugin prepares the components of the corresponding type for the GPU`
			/// by storing them in a [`GpuArrayBuffer`].
			`pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);`

			`impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {`
			`fn build(&self, app: &mut App) {`
			`if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {`
Use GpuArrayBuffer for MeshUniform (#9254) # Objective - Reduce the number of rebindings to enable batching of draw commands ## Solution - Use the new `GpuArrayBuffer` for `MeshUniform` data to store all `MeshUniform` data in arrays within fewer bindings - Sort opaque/alpha mask prepass, opaque/alpha mask main, and shadow phases also by the batch per-object data binding dynamic offset to improve performance on WebGL2. --- ## Changelog - Changed: Per-object `MeshUniform` data is now managed by `GpuArrayBuffer` as arrays in buffers that need to be indexed into. ## Migration Guide Accessing the `model` member of an individual mesh object's shader `Mesh` struct the old way where each `MeshUniform` was stored at its own dynamic offset: ```rust struct Vertex { @location(0) position: vec3<f32>, }; fn vertex(vertex: Vertex) -> VertexOutput { var out: VertexOutput; out.clip_position = mesh_position_local_to_clip( mesh.model, vec4<f32>(vertex.position, 1.0) ); return out; } ``` The new way where one needs to index into the array of `Mesh`es for the batch: ```rust struct Vertex { @builtin(instance_index) instance_index: u32, @location(0) position: vec3<f32>, }; fn vertex(vertex: Vertex) -> VertexOutput { var out: VertexOutput; out.clip_position = mesh_position_local_to_clip( mesh[vertex.instance_index].model, vec4<f32>(vertex.position, 1.0) ); return out; } ``` Note that using the instance_index is the default way to pass the per-object index into the shader, but if you wish to do custom rendering approaches you can pass it in however you like. --------- Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com> Co-authored-by: Elabajaba <Elabajaba@users.noreply.github.com> 2023-07-30 13:17:08 +00:00			`render_app.add_systems(`
			`Render,`
Reorder render sets, refactor bevy_sprite to take advantage (#9236) This is a continuation of this PR: #8062 # Objective - Reorder render schedule sets to allow data preparation when phase item order is known to support improved batching - Part of the batching/instancing etc plan from here: https://github.com/bevyengine/bevy/issues/89#issuecomment-1379249074 - The original idea came from @inodentry and proved to be a good one. Thanks! - Refactor `bevy_sprite` and `bevy_ui` to take advantage of the new ordering ## Solution - Move `Prepare` and `PrepareFlush` after `PhaseSortFlush` - Add a `PrepareAssets` set that runs in parallel with other systems and sets in the render schedule. - Put prepare_assets systems in the `PrepareAssets` set - If explicit dependencies are needed on Mesh or Material RenderAssets then depend on the appropriate system. - Add `ManageViews` and `ManageViewsFlush` sets between `ExtractCommands` and Queue - Move `queue_mesh*_bind_group` to the Prepare stage - Rename them to `prepare_` - Put systems that prepare resources (buffers, textures, etc.) into a `PrepareResources` set inside `Prepare` - Put the `prepare_..._bind_group` systems into a `PrepareBindGroup` set after `PrepareResources` - Move `prepare_lights` to the `ManageViews` set - `prepare_lights` creates views and this must happen before `Queue` - This system needs refactoring to stop handling all responsibilities - Gather lights, sort, and create shadow map views. Store sorted light entities in a resource - Remove `BatchedPhaseItem` - Replace `batch_range` with `batch_size` representing how many items to skip after rendering the item or to skip the item entirely if `batch_size` is 0. - `queue_sprites` has been split into `queue_sprites` for queueing phase items and `prepare_sprites` for batching after the `PhaseSort` - `PhaseItem`s are still inserted in `queue_sprites` - After sorting adjacent compatible sprite phase items are accumulated into `SpriteBatch` components on the first entity of each batch, containing a range of vertex indices. The associated `PhaseItem`'s `batch_size` is updated appropriately. - `SpriteBatch` items are then drawn skipping over the other items in the batch based on the value in `batch_size` - A very similar refactor was performed on `bevy_ui` --- ## Changelog Changed: - Reordered and reworked render app schedule sets. The main change is that data is extracted, queued, sorted, and then prepared when the order of data is known. - Refactor `bevy_sprite` and `bevy_ui` to take advantage of the reordering. ## Migration Guide - Assets such as materials and meshes should now be created in `PrepareAssets` e.g. `prepare_assets<Mesh>` - Queueing entities to `RenderPhase`s continues to be done in `Queue` e.g. `queue_sprites` - Preparing resources (textures, buffers, etc.) should now be done in `PrepareResources`, e.g. `prepare_prepass_textures`, `prepare_mesh_uniforms` - Prepare bind groups should now be done in `PrepareBindGroups` e.g. `prepare_mesh_bind_group` - Any batching or instancing can now be done in `Prepare` where the order of the phase items is known e.g. `prepare_sprites` ## Next Steps - Introduce some generic mechanism to ensure items that can be batched are grouped in the phase item order, currently you could easily have `[sprite at z 0, mesh at z 0, sprite at z 0]` preventing batching. - Investigate improved orderings for building the MeshUniform buffer - Implementing batching across the rest of bevy --------- Co-authored-by: Robert Swain <robert.swain@gmail.com> Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com> 2023-08-27 14:33:49 +00:00			`prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::PrepareResources),`
Use GpuArrayBuffer for MeshUniform (#9254) # Objective - Reduce the number of rebindings to enable batching of draw commands ## Solution - Use the new `GpuArrayBuffer` for `MeshUniform` data to store all `MeshUniform` data in arrays within fewer bindings - Sort opaque/alpha mask prepass, opaque/alpha mask main, and shadow phases also by the batch per-object data binding dynamic offset to improve performance on WebGL2. --- ## Changelog - Changed: Per-object `MeshUniform` data is now managed by `GpuArrayBuffer` as arrays in buffers that need to be indexed into. ## Migration Guide Accessing the `model` member of an individual mesh object's shader `Mesh` struct the old way where each `MeshUniform` was stored at its own dynamic offset: ```rust struct Vertex { @location(0) position: vec3<f32>, }; fn vertex(vertex: Vertex) -> VertexOutput { var out: VertexOutput; out.clip_position = mesh_position_local_to_clip( mesh.model, vec4<f32>(vertex.position, 1.0) ); return out; } ``` The new way where one needs to index into the array of `Mesh`es for the batch: ```rust struct Vertex { @builtin(instance_index) instance_index: u32, @location(0) position: vec3<f32>, }; fn vertex(vertex: Vertex) -> VertexOutput { var out: VertexOutput; out.clip_position = mesh_position_local_to_clip( mesh[vertex.instance_index].model, vec4<f32>(vertex.position, 1.0) ); return out; } ``` Note that using the instance_index is the default way to pass the per-object index into the shader, but if you wish to do custom rendering approaches you can pass it in however you like. --------- Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com> Co-authored-by: Elabajaba <Elabajaba@users.noreply.github.com> 2023-07-30 13:17:08 +00:00			`);`
			`}`
			`}`

			`fn finish(&self, app: &mut App) {`
			`if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {`
			`render_app.insert_resource(GpuArrayBuffer::<C>::new(`
			`render_app.world.resource::<RenderDevice>(),`
			`));`
Add GpuArrayBuffer and BatchedUniformBuffer (#8204) # Objective - Add a type for uploading a Rust `Vec<T>` to a GPU `array<T>`. - Makes progress towards https://github.com/bevyengine/bevy/issues/89. ## Solution - Port @superdump's `BatchedUniformBuffer` to bevy main, as a fallback for WebGL2, which doesn't support storage buffers. - Rather than getting an `array<T>` in a shader, you get an `array<T, N>`, and have to rebind every N elements via dynamic offsets. - Add `GpuArrayBuffer` to abstract over `StorageBuffer<Vec<T>>`/`BatchedUniformBuffer`. ## Future Work Add a shader macro kinda thing to abstract over the following automatically: https://github.com/bevyengine/bevy/pull/8204#pullrequestreview-1396911727 --- ## Changelog * Added `GpuArrayBuffer`, `GpuComponentArrayBufferPlugin`, `GpuArrayBufferable`, and `GpuArrayBufferIndex` types. * Added `DynamicUniformBuffer::new_with_alignment()`. --------- Co-authored-by: Robert Swain <robert.swain@gmail.com> Co-authored-by: François <mockersf@gmail.com> Co-authored-by: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> Co-authored-by: IceSentry <IceSentry@users.noreply.github.com> Co-authored-by: Vincent <9408210+konsolas@users.noreply.github.com> Co-authored-by: robtfm <50659922+robtfm@users.noreply.github.com> 2023-07-21 16:46:56 +00:00			`}`
			`}`
			`}`

			`impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {`
			`fn default() -> Self {`
			`Self(PhantomData::<C>)`
			`}`
			`}`

			`fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(`
			`mut commands: Commands,`
			`render_device: Res<RenderDevice>,`
			`render_queue: Res<RenderQueue>,`
			`mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,`
			`components: Query<(Entity, &C)>,`
			`) {`
			`gpu_array_buffer.clear();`

			`let entities = components`
			`.iter()`
			`.map(\|(entity, component)\| (entity, gpu_array_buffer.push(component.clone())))`
			`.collect::<Vec<_>>();`
			`commands.insert_or_spawn_batch(entities);`

			`gpu_array_buffer.write_buffer(&render_device, &render_queue);`
			`}`