Allow Mesh-related queue phase systems to parallelize (#11804)

# Objective
Partially addresses #3548. `queue_shadows` and `queue_material_meshes`
cannot parallelize because of the `ResMut<RenderMeshInstances>`
parameter for `queue_material_meshes`.

## Solution
Change the `material_bind_group` field to use atomics instead of needing
full mutable access. Change the `ResMut` to a `Res`, which should allow
both sets of systems to parallelize without issue.

## Performance
Tested against `many_foxes`, this has a significant improvement over the
entire render schedule. (Yellow is this PR, red is main)

![image](https://github.com/bevyengine/bevy/assets/3137680/6cc7f346-4f50-4f12-a383-682a9ce1daf6)

The use of atomics does seem to have a negative effect on
`queue_material_meshes` (roughly a 8.29% increase in time spent in the
system).

![image](https://github.com/bevyengine/bevy/assets/3137680/7907079a-863d-4760-aa5b-df68c006ea36)

`queue_shadows` seems to be ever so slightly slower (1.6% more time
spent) in the system.

![image](https://github.com/bevyengine/bevy/assets/3137680/6d90af73-b922-45e4-bae5-df200e8b9784)

`batch_and_prepare_render_phase` seems to be a mix, but overall seems to
be slightly *faster* by about 5%.

![image](https://github.com/bevyengine/bevy/assets/3137680/fac638ff-8c90-436b-9362-c6209b18957c)
This commit is contained in:
James Liu 2024-02-19 16:12:41 -08:00 committed by GitHub
parent dedf66f72b
commit 6d547d7ce6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 61 additions and 17 deletions

View file

@ -31,8 +31,9 @@ use bevy_render::{
Extract, ExtractSchedule, Render, RenderApp, RenderSet,
};
use bevy_utils::{tracing::error, HashMap, HashSet};
use std::hash::Hash;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicU32, Ordering};
use std::{hash::Hash, num::NonZeroU32};
use self::{irradiance_volume::IrradianceVolume, prelude::EnvironmentMapLight};
@ -236,9 +237,7 @@ where
.after(prepare_materials::<M>),
queue_material_meshes::<M>
.in_set(RenderSet::QueueMeshes)
.after(prepare_materials::<M>)
// queue_material_meshes only writes to `material_bind_group_id`, which `queue_shadows` doesn't read
.ambiguous_with(render::queue_shadows::<M>),
.after(prepare_materials::<M>),
),
);
}
@ -466,7 +465,7 @@ pub fn queue_material_meshes<M: Material>(
msaa: Res<Msaa>,
render_meshes: Res<RenderAssets<Mesh>>,
render_materials: Res<RenderMaterials<M>>,
mut render_mesh_instances: ResMut<RenderMeshInstances>,
render_mesh_instances: Res<RenderMeshInstances>,
render_material_instances: Res<RenderMaterialInstances<M>>,
render_lightmaps: Res<RenderLightmaps>,
mut views: Query<(
@ -592,7 +591,7 @@ pub fn queue_material_meshes<M: Material>(
let Some(material_asset_id) = render_material_instances.get(visible_entity) else {
continue;
};
let Some(mesh_instance) = render_mesh_instances.get_mut(visible_entity) else {
let Some(mesh_instance) = render_mesh_instances.get(visible_entity) else {
continue;
};
let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else {
@ -646,7 +645,9 @@ pub fn queue_material_meshes<M: Material>(
}
};
mesh_instance.material_bind_group_id = material.get_bind_group_id();
mesh_instance
.material_bind_group_id
.set(material.get_bind_group_id());
match material.properties.alpha_mode {
AlphaMode::Opaque => {
@ -795,6 +796,34 @@ pub struct PreparedMaterial<T: Material> {
#[derive(Component, Clone, Copy, Default, PartialEq, Eq, Deref, DerefMut)]
pub struct MaterialBindGroupId(Option<BindGroupId>);
/// An atomic version of [`MaterialBindGroupId`] that can be read from and written to
/// safely from multiple threads.
#[derive(Default)]
pub struct AtomicMaterialBindGroupId(AtomicU32);
impl AtomicMaterialBindGroupId {
/// Stores a value atomically. Uses [`Ordering::Relaxed`] so there is zero guarentee of ordering
/// relative to other operations.
///
/// See also: [`AtomicU32::store`].
pub fn set(&self, id: MaterialBindGroupId) {
let id = if let Some(id) = id.0 {
NonZeroU32::from(id).get()
} else {
0
};
self.0.store(id, Ordering::Relaxed);
}
/// Loads a value atomically. Uses [`Ordering::Relaxed`] so there is zero guarentee of ordering
/// relative to other operations.
///
/// See also: [`AtomicU32::load`].
pub fn get(&self) -> MaterialBindGroupId {
MaterialBindGroupId(NonZeroU32::new(self.0.load(Ordering::Relaxed)).map(BindGroupId::from))
}
}
impl<T: Material> PreparedMaterial<T> {
pub fn get_bind_group_id(&self) -> MaterialBindGroupId {
MaterialBindGroupId(Some(self.bind_group.id()))

View file

@ -1591,7 +1591,7 @@ pub fn queue_shadows<M: Material>(
shadow_draw_functions: Res<DrawFunctions<Shadow>>,
prepass_pipeline: Res<PrepassPipeline<M>>,
render_meshes: Res<RenderAssets<Mesh>>,
mut render_mesh_instances: ResMut<RenderMeshInstances>,
render_mesh_instances: Res<RenderMeshInstances>,
render_materials: Res<RenderMaterials<M>>,
render_material_instances: Res<RenderMaterialInstances<M>>,
mut pipelines: ResMut<SpecializedMeshPipelines<PrepassPipeline<M>>>,
@ -1637,7 +1637,7 @@ pub fn queue_shadows<M: Material>(
// NOTE: Lights with shadow mapping disabled will have no visible entities
// so no meshes will be queued
for entity in visible_entities.iter().copied() {
let Some(mesh_instance) = render_mesh_instances.get_mut(&entity) else {
let Some(mesh_instance) = render_mesh_instances.get(&entity) else {
continue;
};
if !mesh_instance.shadow_caster {
@ -1697,7 +1697,9 @@ pub fn queue_shadows<M: Material>(
}
};
mesh_instance.material_bind_group_id = material.get_bind_group_id();
mesh_instance
.material_bind_group_id
.set(material.get_bind_group_id());
shadow_phase.add(Shadow {
draw_function: draw_shadow_mesh,

View file

@ -1,7 +1,8 @@
use crate::{
MaterialBindGroupId, NotShadowCaster, NotShadowReceiver, PreviousGlobalTransform, Shadow,
ViewFogUniformOffset, ViewLightProbesUniformOffset, ViewLightsUniformOffset,
CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS,
AtomicMaterialBindGroupId, MaterialBindGroupId, NotShadowCaster, NotShadowReceiver,
PreviousGlobalTransform, Shadow, ViewFogUniformOffset, ViewLightProbesUniformOffset,
ViewLightsUniformOffset, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT,
MAX_DIRECTIONAL_LIGHTS,
};
use bevy_app::{Plugin, PostUpdate};
use bevy_asset::{load_internal_asset, AssetId, Handle};
@ -251,14 +252,14 @@ bitflags::bitflags! {
pub struct RenderMeshInstance {
pub transforms: MeshTransforms,
pub mesh_asset_id: AssetId<Mesh>,
pub material_bind_group_id: MaterialBindGroupId,
pub material_bind_group_id: AtomicMaterialBindGroupId,
pub shadow_caster: bool,
pub automatic_batching: bool,
}
impl RenderMeshInstance {
pub fn should_batch(&self) -> bool {
self.automatic_batching && self.material_bind_group_id.is_some()
self.automatic_batching && self.material_bind_group_id.get().is_some()
}
}
@ -323,7 +324,7 @@ pub fn extract_meshes(
mesh_asset_id: handle.id(),
transforms,
shadow_caster: !not_shadow_caster,
material_bind_group_id: MaterialBindGroupId::default(),
material_bind_group_id: AtomicMaterialBindGroupId::default(),
automatic_batching: !no_automatic_batching,
},
));
@ -475,7 +476,7 @@ impl GetBatchData for MeshPipeline {
maybe_lightmap.map(|lightmap| lightmap.uv_rect),
),
mesh_instance.should_batch().then_some((
mesh_instance.material_bind_group_id,
mesh_instance.material_bind_group_id.get(),
mesh_instance.mesh_asset_id,
maybe_lightmap.map(|lightmap| lightmap.image),
)),

View file

@ -136,6 +136,18 @@ macro_rules! define_atomic_id {
}))
}
}
impl From<$atomic_id_type> for core::num::NonZeroU32 {
fn from(value: $atomic_id_type) -> Self {
value.0
}
}
impl From<core::num::NonZeroU32> for $atomic_id_type {
fn from(value: core::num::NonZeroU32) -> Self {
Self(value)
}
}
};
}