Move skin code to a separate module (#9899)

# Objective

mesh.rs is infamously large. We could split off unrelated code.

## Solution

Morph targets are very similar to skinning and have their own module. We
move skinned meshes to an independent module like morph targets and give
the systems similar names.

### Open questions

Should the skinning systems and structs stay public?

---

## Migration Guide

Renamed skinning systems, resources and components:

- extract_skinned_meshes -> extract_skins
- prepare_skinned_meshes -> prepare_skins
- SkinnedMeshUniform -> SkinUniform
- SkinnedMeshJoints -> SkinIndex

---------

Co-authored-by: François <mockersf@gmail.com>
Co-authored-by: vero <email@atlasdostal.com>
This commit is contained in:
Nicola Papale 2023-09-25 20:40:22 +02:00 committed by GitHub
parent ae95ba5278
commit db1e3d36bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 166 additions and 147 deletions

View file

@ -6,7 +6,7 @@ use crate::{
CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS,
};
use bevy_app::Plugin;
use bevy_asset::{load_internal_asset, AssetId, Assets, Handle};
use bevy_asset::{load_internal_asset, AssetId, Handle};
use bevy_core_pipeline::{
core_3d::{AlphaMask3d, Opaque3d, Transparent3d},
prepass::ViewPrepassTextures,
@ -19,15 +19,11 @@ use bevy_ecs::{
query::{QueryItem, ROQueryItem},
system::{lifetimeless::*, SystemParamItem, SystemState},
};
use bevy_math::{Affine3, Mat4, Vec2, Vec4};
use bevy_math::{Affine3, Vec2, Vec4};
use bevy_render::{
batching::{
batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData,
NoAutomaticBatching,
},
batching::{batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData},
globals::{GlobalsBuffer, GlobalsUniform},
mesh::{
skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
GpuBufferInfo, InnerMeshVertexBufferLayout, Mesh, MeshVertexBufferLayout,
VertexAttributeDescriptor,
},
@ -48,17 +44,13 @@ use bevy_utils::{tracing::error, HashMap, Hashed};
use crate::render::{
morph::{extract_morphs, prepare_morphs, MorphIndex, MorphUniform},
skin::{extract_skins, prepare_skins, SkinIndex, SkinUniform},
MeshLayouts,
};
#[derive(Default)]
pub struct MeshRenderPlugin;
/// Maximum number of joints supported for skinned meshes.
pub const MAX_JOINTS: usize = 256;
const JOINT_SIZE: usize = std::mem::size_of::<Mat4>();
pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE;
pub const MESH_VERTEX_OUTPUT: Handle<Shader> = Handle::weak_from_u128(2645551199423808407);
pub const MESH_VIEW_TYPES_HANDLE: Handle<Shader> = Handle::weak_from_u128(8140454348013264787);
pub const MESH_VIEW_BINDINGS_HANDLE: Handle<Shader> = Handle::weak_from_u128(9076678235888822571);
@ -112,12 +104,12 @@ impl Plugin for MeshRenderPlugin {
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.init_resource::<SkinnedMeshUniform>()
.init_resource::<MeshBindGroups>()
.init_resource::<SkinUniform>()
.init_resource::<MorphUniform>()
.add_systems(
ExtractSchedule,
(extract_meshes, extract_skinned_meshes, extract_morphs),
(extract_meshes, extract_skins, extract_morphs),
)
.add_systems(
Render,
@ -131,7 +123,7 @@ impl Plugin for MeshRenderPlugin {
.in_set(RenderSet::PrepareResources),
write_batched_instance_buffer::<MeshPipeline>
.in_set(RenderSet::PrepareResourcesFlush),
prepare_skinned_meshes.in_set(RenderSet::PrepareResources),
prepare_skins.in_set(RenderSet::PrepareResources),
prepare_morphs.in_set(RenderSet::PrepareResources),
prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
prepare_mesh_view_bind_groups.in_set(RenderSet::PrepareBindGroups),
@ -270,91 +262,6 @@ pub fn extract_meshes(
commands.insert_or_spawn_batch(not_caster_commands);
}
#[derive(Component)]
pub struct SkinnedMeshJoints {
pub index: u32,
}
impl SkinnedMeshJoints {
#[inline]
pub fn build(
skin: &SkinnedMesh,
inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
joints: &Query<&GlobalTransform>,
buffer: &mut BufferVec<Mat4>,
) -> Option<Self> {
let inverse_bindposes = inverse_bindposes.get(&skin.inverse_bindposes)?;
let start = buffer.len();
let target = start + skin.joints.len().min(MAX_JOINTS);
buffer.extend(
joints
.iter_many(&skin.joints)
.zip(inverse_bindposes.iter())
.take(MAX_JOINTS)
.map(|(joint, bindpose)| joint.affine() * *bindpose),
);
// iter_many will skip any failed fetches. This will cause it to assign the wrong bones,
// so just bail by truncating to the start.
if buffer.len() != target {
buffer.truncate(start);
return None;
}
// Pad to 256 byte alignment
while buffer.len() % 4 != 0 {
buffer.push(Mat4::ZERO);
}
Some(Self {
index: start as u32,
})
}
/// Updated index to be in address space based on [`SkinnedMeshUniform`] size.
pub fn to_buffer_index(mut self) -> Self {
self.index *= std::mem::size_of::<Mat4>() as u32;
self
}
}
pub fn extract_skinned_meshes(
mut commands: Commands,
mut previous_len: Local<usize>,
mut uniform: ResMut<SkinnedMeshUniform>,
query: Extract<Query<(Entity, &ViewVisibility, &SkinnedMesh)>>,
inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
joint_query: Extract<Query<&GlobalTransform>>,
) {
uniform.buffer.clear();
let mut values = Vec::with_capacity(*previous_len);
let mut last_start = 0;
for (entity, view_visibility, skin) in &query {
if !view_visibility.get() {
continue;
}
// PERF: This can be expensive, can we move this to prepare?
if let Some(skinned_joints) =
SkinnedMeshJoints::build(skin, &inverse_bindposes, &joint_query, &mut uniform.buffer)
{
last_start = last_start.max(skinned_joints.index as usize);
// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
// entity and so cannot currently be batched.
values.push((
entity,
(skinned_joints.to_buffer_index(), NoAutomaticBatching),
));
}
}
// Pad out the buffer to ensure that there's enough space for bindings
while uniform.buffer.len() - last_start < MAX_JOINTS {
uniform.buffer.push(Mat4::ZERO);
}
*previous_len = values.len();
commands.insert_or_spawn_batch(values);
}
#[derive(Resource, Clone)]
pub struct MeshPipeline {
pub view_layout: BindGroupLayout,
@ -1043,7 +950,7 @@ pub fn prepare_mesh_bind_group(
mesh_pipeline: Res<MeshPipeline>,
render_device: Res<RenderDevice>,
mesh_uniforms: Res<GpuArrayBuffer<MeshUniform>>,
skinned_mesh_uniform: Res<SkinnedMeshUniform>,
skins_uniform: Res<SkinUniform>,
weights_uniform: Res<MorphUniform>,
) {
groups.reset();
@ -1053,7 +960,7 @@ pub fn prepare_mesh_bind_group(
};
groups.model_only = Some(layouts.model_only(&render_device, &model));
let skin = skinned_mesh_uniform.buffer.buffer();
let skin = skins_uniform.buffer.buffer();
if let Some(skin) = skin {
groups.skinned = Some(layouts.skinned(&render_device, &model, skin));
}
@ -1072,41 +979,6 @@ pub fn prepare_mesh_bind_group(
}
}
// NOTE: This is using BufferVec because it is using a trick to allow a fixed-size array
// in a uniform buffer to be used like a variable-sized array by only writing the valid data
// into the buffer, knowing the number of valid items starting from the dynamic offset, and
// ignoring the rest, whether they're valid for other dynamic offsets or not. This trick may
// be supported later in encase, and then we should make use of it.
#[derive(Resource)]
pub struct SkinnedMeshUniform {
pub buffer: BufferVec<Mat4>,
}
impl Default for SkinnedMeshUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
}
}
}
pub fn prepare_skinned_meshes(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut skinned_mesh_uniform: ResMut<SkinnedMeshUniform>,
) {
if skinned_mesh_uniform.buffer.is_empty() {
return;
}
let len = skinned_mesh_uniform.buffer.len();
skinned_mesh_uniform.buffer.reserve(len, &render_device);
skinned_mesh_uniform
.buffer
.write_buffer(&render_device, &render_queue);
}
#[derive(Component)]
pub struct MeshViewBindGroup {
pub value: BindGroup,
@ -1308,7 +1180,7 @@ impl<P: PhaseItem, const I: usize> RenderCommand<P> for SetMeshBindGroup<I> {
type ViewWorldQuery = ();
type ItemWorldQuery = (
Read<Handle<Mesh>>,
Option<Read<SkinnedMeshJoints>>,
Option<Read<SkinIndex>>,
Option<Read<MorphIndex>>,
);

View file

@ -1,5 +1,6 @@
//! Bind group layout related definitions for the mesh pipeline.
use bevy_math::Mat4;
use bevy_render::{
mesh::morph::MAX_MORPH_WEIGHTS,
render_resource::{
@ -9,13 +10,17 @@ use bevy_render::{
renderer::RenderDevice,
};
use crate::render::skin::MAX_JOINTS;
const MORPH_WEIGHT_SIZE: usize = std::mem::size_of::<f32>();
pub const MORPH_BUFFER_SIZE: usize = MAX_MORPH_WEIGHTS * MORPH_WEIGHT_SIZE;
const JOINT_SIZE: usize = std::mem::size_of::<Mat4>();
pub(crate) const JOINT_BUFFER_SIZE: usize = MAX_JOINTS * JOINT_SIZE;
/// Individual layout entries.
mod layout_entry {
use super::MORPH_BUFFER_SIZE;
use crate::render::mesh::JOINT_BUFFER_SIZE;
use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE};
use crate::MeshUniform;
use bevy_render::{
render_resource::{
@ -66,8 +71,7 @@ mod layout_entry {
/// Individual [`BindGroupEntry`](bevy_render::render_resource::BindGroupEntry)
/// for bind groups.
mod entry {
use super::MORPH_BUFFER_SIZE;
use crate::render::mesh::JOINT_BUFFER_SIZE;
use super::{JOINT_BUFFER_SIZE, MORPH_BUFFER_SIZE};
use bevy_render::render_resource::{
BindGroupEntry, BindingResource, Buffer, BufferBinding, BufferSize, TextureView,
};

View file

@ -3,8 +3,10 @@ mod light;
pub(crate) mod mesh;
mod mesh_bindings;
mod morph;
mod skin;
pub use fog::*;
pub use light::*;
pub use mesh::*;
pub use mesh_bindings::MeshLayouts;
pub use skin::{extract_skins, prepare_skins, SkinIndex, SkinUniform, MAX_JOINTS};

View file

@ -28,16 +28,16 @@ impl Default for MorphUniform {
}
pub fn prepare_morphs(
device: Res<RenderDevice>,
queue: Res<RenderQueue>,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut uniform: ResMut<MorphUniform>,
) {
if uniform.buffer.is_empty() {
return;
}
let buffer = &mut uniform.buffer;
buffer.reserve(buffer.len(), &device);
buffer.write_buffer(&device, &queue);
let len = uniform.buffer.len();
uniform.buffer.reserve(len, &render_device);
uniform.buffer.write_buffer(&render_device, &render_queue);
}
const fn can_align(step: usize, target: usize) -> bool {
@ -69,6 +69,8 @@ fn add_to_alignment<T: Pod + Default>(buffer: &mut BufferVec<T>) {
buffer.extend(iter::repeat_with(T::default).take(ts_to_add));
}
// Notes on implementation: see comment on top of the extract_skins system in skin module.
// This works similarly, but for `f32` instead of `Mat4`
pub fn extract_morphs(
mut commands: Commands,
mut previous_len: Local<usize>,

View file

@ -0,0 +1,139 @@
use bevy_asset::Assets;
use bevy_ecs::prelude::*;
use bevy_math::Mat4;
use bevy_render::{
batching::NoAutomaticBatching,
mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
render_resource::{BufferUsages, BufferVec},
renderer::{RenderDevice, RenderQueue},
view::ViewVisibility,
Extract,
};
use bevy_transform::prelude::GlobalTransform;
/// Maximum number of joints supported for skinned meshes.
pub const MAX_JOINTS: usize = 256;
#[derive(Component)]
pub struct SkinIndex {
pub index: u32,
}
impl SkinIndex {
/// Index to be in address space based on [`SkinUniform`] size.
const fn new(start: usize) -> Self {
SkinIndex {
index: (start * std::mem::size_of::<Mat4>()) as u32,
}
}
}
// Notes on implementation: see comment on top of the `extract_skins` system.
#[derive(Resource)]
pub struct SkinUniform {
pub buffer: BufferVec<Mat4>,
}
impl Default for SkinUniform {
fn default() -> Self {
Self {
buffer: BufferVec::new(BufferUsages::UNIFORM),
}
}
}
pub fn prepare_skins(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut uniform: ResMut<SkinUniform>,
) {
if uniform.buffer.is_empty() {
return;
}
let len = uniform.buffer.len();
uniform.buffer.reserve(len, &render_device);
uniform.buffer.write_buffer(&render_device, &render_queue);
}
// Notes on implementation:
// We define the uniform binding as an array<mat4x4<f32>, N> in the shader,
// where N is the maximum number of Mat4s we can fit in the uniform binding,
// which may be as little as 16kB or 64kB. But, we may not need all N.
// We may only need, for example, 10.
//
// If we used uniform buffers normally then we would have to write a full
// binding of data for each dynamic offset binding, which is wasteful, makes
// the buffer much larger than it needs to be, and uses more memory bandwidth
// to transfer the data, which then costs frame time So @superdump came up
// with this design: just bind data at the specified offset and interpret
// the data at that offset as an array<T, N> regardless of what is there.
//
// So instead of writing N Mat4s when you only need 10, you write 10, and
// then pad up to the next dynamic offset alignment. Then write the next.
// And for the last dynamic offset binding, make sure there is a full binding
// of data after it so that the buffer is of size
// `last dynamic offset` + `array<mat4x4<f32>>`.
//
// Then when binding the first dynamic offset, the first 10 entries in the array
// are what you expect, but if you read the 11th youre reading invalid data
// which could be padding or could be from the next binding.
//
// In this way, we can pack variable sized arrays into uniform buffer bindings
// which normally only support fixed size arrays. You just have to make sure
// in the shader that you only read the values that are valid for that binding.
pub fn extract_skins(
mut commands: Commands,
mut previous_len: Local<usize>,
mut uniform: ResMut<SkinUniform>,
query: Extract<Query<(Entity, &ViewVisibility, &SkinnedMesh)>>,
inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
joints: Extract<Query<&GlobalTransform>>,
) {
uniform.buffer.clear();
let mut values = Vec::with_capacity(*previous_len);
let mut last_start = 0;
// PERF: This can be expensive, can we move this to prepare?
for (entity, view_visibility, skin) in &query {
if !view_visibility.get() {
continue;
}
let buffer = &mut uniform.buffer;
let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else {
continue;
};
let start = buffer.len();
let target = start + skin.joints.len().min(MAX_JOINTS);
buffer.extend(
joints
.iter_many(&skin.joints)
.zip(inverse_bindposes.iter())
.take(MAX_JOINTS)
.map(|(joint, bindpose)| joint.affine() * *bindpose),
);
// iter_many will skip any failed fetches. This will cause it to assign the wrong bones,
// so just bail by truncating to the start.
if buffer.len() != target {
buffer.truncate(start);
continue;
}
last_start = last_start.max(start);
// Pad to 256 byte alignment
while buffer.len() % 4 != 0 {
buffer.push(Mat4::ZERO);
}
// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
// entity and so cannot currently be batched.
values.push((entity, (SkinIndex::new(start), NoAutomaticBatching)));
}
// Pad out the buffer to ensure that there's enough space for bindings
while uniform.buffer.len() - last_start < MAX_JOINTS {
uniform.buffer.push(Mat4::ZERO);
}
*previous_len = values.len();
commands.insert_or_spawn_batch(values);
}