Throttle render assets (#12622)

# Objective

allow throttling of gpu uploads to prevent choppy framerate when many
textures/meshes are loaded in.

## Solution

- `RenderAsset`s can implement `byte_len()` which reports their size.
implemented this for `Mesh` and `Image`
- users can add a `RenderAssetBytesPerFrame` which specifies max bytes
to attempt to upload in a frame
- `render_assets::<A>` checks how many bytes have been written before
attempting to upload assets. the limit is a soft cap: assets will be
written until the total has exceeded the cap, to ensure some forward
progress every frame

notes:
- this is a stopgap until we have multiple wgpu queues for proper
streaming of data
- requires #12606

issues
- ~~fonts sometimes only partially upload. i have no clue why, needs to
be fixed~~ fixed now.
- choosing the #bytes is tricky as it should be hardware / framerate
dependent
- many features are not tested (env maps, light probes, etc) - they
won't break unless `RenderAssetBytesPerFrame` is explicitly used though

---------

Co-authored-by: IceSentry <IceSentry@users.noreply.github.com>
Co-authored-by: François Mockers <francois.mockers@vleue.com>
This commit is contained in:
robtfm 2024-04-27 00:43:33 +01:00 committed by GitHub
parent 9c38844fc8
commit 91a393a9e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 149 additions and 7 deletions

View file

@ -58,7 +58,9 @@ pub use extract_param::Extract;
use bevy_hierarchy::ValidParentCheckPlugin; use bevy_hierarchy::ValidParentCheckPlugin;
use bevy_window::{PrimaryWindow, RawHandleWrapper}; use bevy_window::{PrimaryWindow, RawHandleWrapper};
use extract_resource::ExtractResourcePlugin;
use globals::GlobalsPlugin; use globals::GlobalsPlugin;
use render_asset::RenderAssetBytesPerFrame;
use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue}; use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue};
use crate::mesh::GpuMesh; use crate::mesh::GpuMesh;
@ -334,6 +336,9 @@ impl Plugin for RenderPlugin {
MorphPlugin, MorphPlugin,
)); ));
app.init_resource::<RenderAssetBytesPerFrame>()
.add_plugins(ExtractResourcePlugin::<RenderAssetBytesPerFrame>::default());
app.register_type::<alpha::AlphaMode>() app.register_type::<alpha::AlphaMode>()
// These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy // These types cannot be registered in bevy_color, as it does not depend on the rest of Bevy
.register_type::<bevy_color::Color>() .register_type::<bevy_color::Color>()
@ -375,7 +380,14 @@ impl Plugin for RenderPlugin {
.insert_resource(device) .insert_resource(device)
.insert_resource(queue) .insert_resource(queue)
.insert_resource(render_adapter) .insert_resource(render_adapter)
.insert_resource(adapter_info); .insert_resource(adapter_info)
.add_systems(
Render,
(|mut bpf: ResMut<RenderAssetBytesPerFrame>| {
bpf.reset();
})
.in_set(RenderSet::Cleanup),
);
} }
} }
} }

View file

@ -1477,6 +1477,18 @@ impl RenderAsset for GpuMesh {
mesh.asset_usage mesh.asset_usage
} }
fn byte_len(mesh: &Self::SourceAsset) -> Option<usize> {
let mut vertex_size = 0;
for attribute_data in mesh.attributes.values() {
let vertex_format = attribute_data.attribute.format;
vertex_size += vertex_format.get_size() as usize;
}
let vertex_count = mesh.count_vertices();
let index_bytes = mesh.get_index_buffer_bytes().map(<[_]>::len).unwrap_or(0);
Some(vertex_size * vertex_count + index_bytes)
}
/// Converts the extracted mesh a into [`GpuMesh`]. /// Converts the extracted mesh a into [`GpuMesh`].
fn prepare_asset( fn prepare_asset(
mesh: Self::SourceAsset, mesh: Self::SourceAsset,

View file

@ -8,7 +8,8 @@ use bevy_ecs::{
world::{FromWorld, Mut}, world::{FromWorld, Mut},
}; };
use bevy_reflect::{Reflect, ReflectDeserialize, ReflectSerialize}; use bevy_reflect::{Reflect, ReflectDeserialize, ReflectSerialize};
use bevy_utils::{HashMap, HashSet}; use bevy_render_macros::ExtractResource;
use bevy_utils::{tracing::debug, HashMap, HashSet};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::marker::PhantomData; use std::marker::PhantomData;
use thiserror::Error; use thiserror::Error;
@ -41,6 +42,14 @@ pub trait RenderAsset: Send + Sync + 'static + Sized {
RenderAssetUsages::default() RenderAssetUsages::default()
} }
/// Size of the data the asset will upload to the gpu. Specifying a return value
/// will allow the asset to be throttled via [`RenderAssetBytesPerFrame`].
#[inline]
#[allow(unused_variables)]
fn byte_len(source_asset: &Self::SourceAsset) -> Option<usize> {
None
}
/// Prepares the [`RenderAsset::SourceAsset`] for the GPU by transforming it into a [`RenderAsset`]. /// Prepares the [`RenderAsset::SourceAsset`] for the GPU by transforming it into a [`RenderAsset`].
/// ///
/// ECS data may be accessed via `param`. /// ECS data may be accessed via `param`.
@ -160,7 +169,8 @@ impl<A: RenderAsset> RenderAssetDependency for A {
#[derive(Resource)] #[derive(Resource)]
pub struct ExtractedAssets<A: RenderAsset> { pub struct ExtractedAssets<A: RenderAsset> {
extracted: Vec<(AssetId<A::SourceAsset>, A::SourceAsset)>, extracted: Vec<(AssetId<A::SourceAsset>, A::SourceAsset)>,
removed: Vec<AssetId<A::SourceAsset>>, removed: HashSet<AssetId<A::SourceAsset>>,
added: HashSet<AssetId<A::SourceAsset>>,
} }
impl<A: RenderAsset> Default for ExtractedAssets<A> { impl<A: RenderAsset> Default for ExtractedAssets<A> {
@ -168,6 +178,7 @@ impl<A: RenderAsset> Default for ExtractedAssets<A> {
Self { Self {
extracted: Default::default(), extracted: Default::default(),
removed: Default::default(), removed: Default::default(),
added: Default::default(),
} }
} }
} }
@ -233,7 +244,7 @@ fn extract_render_asset<A: RenderAsset>(mut commands: Commands, mut main_world:
let (mut events, mut assets) = cached_state.state.get_mut(world); let (mut events, mut assets) = cached_state.state.get_mut(world);
let mut changed_assets = HashSet::default(); let mut changed_assets = HashSet::default();
let mut removed = Vec::new(); let mut removed = HashSet::default();
for event in events.read() { for event in events.read() {
#[allow(clippy::match_same_arms)] #[allow(clippy::match_same_arms)]
@ -244,7 +255,7 @@ fn extract_render_asset<A: RenderAsset>(mut commands: Commands, mut main_world:
AssetEvent::Removed { .. } => {} AssetEvent::Removed { .. } => {}
AssetEvent::Unused { id } => { AssetEvent::Unused { id } => {
changed_assets.remove(id); changed_assets.remove(id);
removed.push(*id); removed.insert(*id);
} }
AssetEvent::LoadedWithDependencies { .. } => { AssetEvent::LoadedWithDependencies { .. } => {
// TODO: handle this // TODO: handle this
@ -253,6 +264,7 @@ fn extract_render_asset<A: RenderAsset>(mut commands: Commands, mut main_world:
} }
let mut extracted_assets = Vec::new(); let mut extracted_assets = Vec::new();
let mut added = HashSet::new();
for id in changed_assets.drain() { for id in changed_assets.drain() {
if let Some(asset) = assets.get(id) { if let Some(asset) = assets.get(id) {
let asset_usage = A::asset_usage(asset); let asset_usage = A::asset_usage(asset);
@ -260,9 +272,11 @@ fn extract_render_asset<A: RenderAsset>(mut commands: Commands, mut main_world:
if asset_usage == RenderAssetUsages::RENDER_WORLD { if asset_usage == RenderAssetUsages::RENDER_WORLD {
if let Some(asset) = assets.remove(id) { if let Some(asset) = assets.remove(id) {
extracted_assets.push((id, asset)); extracted_assets.push((id, asset));
added.insert(id);
} }
} else { } else {
extracted_assets.push((id, asset.clone())); extracted_assets.push((id, asset.clone()));
added.insert(id);
} }
} }
} }
@ -271,6 +285,7 @@ fn extract_render_asset<A: RenderAsset>(mut commands: Commands, mut main_world:
commands.insert_resource(ExtractedAssets::<A> { commands.insert_resource(ExtractedAssets::<A> {
extracted: extracted_assets, extracted: extracted_assets,
removed, removed,
added,
}); });
cached_state.state.apply(world); cached_state.state.apply(world);
}, },
@ -299,17 +314,37 @@ pub fn prepare_assets<A: RenderAsset>(
mut render_assets: ResMut<RenderAssets<A>>, mut render_assets: ResMut<RenderAssets<A>>,
mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>, mut prepare_next_frame: ResMut<PrepareNextFrameAssets<A>>,
param: StaticSystemParam<<A as RenderAsset>::Param>, param: StaticSystemParam<<A as RenderAsset>::Param>,
mut bpf: ResMut<RenderAssetBytesPerFrame>,
) { ) {
let mut wrote_asset_count = 0;
let mut param = param.into_inner(); let mut param = param.into_inner();
let queued_assets = std::mem::take(&mut prepare_next_frame.assets); let queued_assets = std::mem::take(&mut prepare_next_frame.assets);
for (id, extracted_asset) in queued_assets { for (id, extracted_asset) in queued_assets {
if extracted_assets.removed.contains(&id) { if extracted_assets.removed.contains(&id) || extracted_assets.added.contains(&id) {
// skip previous frame's assets that have been removed or updated
continue; continue;
} }
let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) {
// we could check if available bytes > byte_len here, but we want to make some
// forward progress even if the asset is larger than the max bytes per frame.
// this way we always write at least one (sized) asset per frame.
// in future we could also consider partial asset uploads.
if bpf.exhausted() {
prepare_next_frame.assets.push((id, extracted_asset));
continue;
}
size
} else {
0
};
match A::prepare_asset(extracted_asset, &mut param) { match A::prepare_asset(extracted_asset, &mut param) {
Ok(prepared_asset) => { Ok(prepared_asset) => {
render_assets.insert(id, prepared_asset); render_assets.insert(id, prepared_asset);
bpf.write_bytes(write_bytes);
wrote_asset_count += 1;
} }
Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => {
prepare_next_frame.assets.push((id, extracted_asset)); prepare_next_frame.assets.push((id, extracted_asset));
@ -317,18 +352,96 @@ pub fn prepare_assets<A: RenderAsset>(
} }
} }
for removed in extracted_assets.removed.drain(..) { for removed in extracted_assets.removed.drain() {
render_assets.remove(removed); render_assets.remove(removed);
} }
for (id, extracted_asset) in extracted_assets.extracted.drain(..) { for (id, extracted_asset) in extracted_assets.extracted.drain(..) {
// we remove previous here to ensure that if we are updating the asset then
// any users will not see the old asset after a new asset is extracted,
// even if the new asset is not yet ready or we are out of bytes to write.
render_assets.remove(id);
let write_bytes = if let Some(size) = A::byte_len(&extracted_asset) {
if bpf.exhausted() {
prepare_next_frame.assets.push((id, extracted_asset));
continue;
}
size
} else {
0
};
match A::prepare_asset(extracted_asset, &mut param) { match A::prepare_asset(extracted_asset, &mut param) {
Ok(prepared_asset) => { Ok(prepared_asset) => {
render_assets.insert(id, prepared_asset); render_assets.insert(id, prepared_asset);
bpf.write_bytes(write_bytes);
wrote_asset_count += 1;
} }
Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => { Err(PrepareAssetError::RetryNextUpdate(extracted_asset)) => {
prepare_next_frame.assets.push((id, extracted_asset)); prepare_next_frame.assets.push((id, extracted_asset));
} }
} }
} }
if bpf.exhausted() && !prepare_next_frame.assets.is_empty() {
debug!(
"{} write budget exhausted with {} assets remaining (wrote {})",
std::any::type_name::<A>(),
prepare_next_frame.assets.len(),
wrote_asset_count
);
}
}
/// A resource that attempts to limit the amount of data transferred from cpu to gpu
/// each frame, preventing choppy frames at the cost of waiting longer for gpu assets
/// to become available
#[derive(Resource, Default, Debug, Clone, Copy, ExtractResource)]
pub struct RenderAssetBytesPerFrame {
pub max_bytes: Option<usize>,
pub available: usize,
}
impl RenderAssetBytesPerFrame {
/// `max_bytes`: the number of bytes to write per frame.
/// this is a soft limit: only full assets are written currently, uploading stops
/// after the first asset that exceeds the limit.
/// To participate, assets should implement [`RenderAsset::byte_len`]. If the default
/// is not overridden, the assets are assumed to be small enough to upload without restriction.
pub fn new(max_bytes: usize) -> Self {
Self {
max_bytes: Some(max_bytes),
available: 0,
}
}
/// Reset the available bytes. Called once per frame by the [`crate::RenderPlugin`].
pub fn reset(&mut self) {
self.available = self.max_bytes.unwrap_or(usize::MAX);
}
/// check how many bytes are available since the last reset
pub fn available_bytes(&self, required_bytes: usize) -> usize {
if self.max_bytes.is_none() {
return required_bytes;
}
required_bytes.min(self.available)
}
/// decrease the available bytes for the current frame
fn write_bytes(&mut self, bytes: usize) {
if self.max_bytes.is_none() {
return;
}
let write_bytes = bytes.min(self.available);
self.available -= write_bytes;
}
// check if any bytes remain available for writing this frame
fn exhausted(&self) -> bool {
self.available == 0
}
} }

View file

@ -839,6 +839,11 @@ impl RenderAsset for GpuImage {
image.asset_usage image.asset_usage
} }
#[inline]
fn byte_len(image: &Self::SourceAsset) -> Option<usize> {
Some(image.data.len())
}
/// Converts the extracted image into a [`GpuImage`]. /// Converts the extracted image into a [`GpuImage`].
fn prepare_asset( fn prepare_asset(
image: Self::SourceAsset, image: Self::SourceAsset,