improve compile time by type-erasing wgpu structs (#5950)

# Objective

structs containing wgpu types take a long time to compile. this is particularly bad for generics containing the wgpu structs (like the depth pipeline builder with `#[derive(SystemParam)]` i've been working on).

we can avoid that by boxing and type-erasing in the bevy `render_resource` wrappers.

type system magic is not a strength of mine so i guess there will be a cleaner way to achieve this, happy to take feedback or for it to be taken as a proof of concept if someone else wants to do a better job.

## Solution

- add macros to box and type-erase in debug mode
- leave current impl for release mode

timings:


<html xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns="http://www.w3.org/TR/REC-html40">

<head>

<meta name=ProgId content=Excel.Sheet>
<meta name=Generator content="Microsoft Excel 15">
<link id=Main-File rel=Main-File
href="file:///C:/Users/robfm/AppData/Local/Temp/msohtmlclip1/01/clip.htm">
<link rel=File-List
href="file:///C:/Users/robfm/AppData/Local/Temp/msohtmlclip1/01/clip_filelist.xml">
<!--table
	{mso-displayed-decimal-separator:"\.";
	mso-displayed-thousand-separator:"\,";}
@page
	{margin:.75in .7in .75in .7in;
	mso-header-margin:.3in;
	mso-footer-margin:.3in;}
tr
	{mso-height-source:auto;}
col
	{mso-width-source:auto;}
br
	{mso-data-placement:same-cell;}
td
	{padding-top:1px;
	padding-right:1px;
	padding-left:1px;
	mso-ignore:padding;
	color:black;
	font-size:11.0pt;
	font-weight:400;
	font-style:normal;
	text-decoration:none;
	font-family:Calibri, sans-serif;
	mso-font-charset:0;
	mso-number-format:General;
	text-align:general;
	vertical-align:bottom;
	border:none;
	mso-background-source:auto;
	mso-pattern:auto;
	mso-protection:locked visible;
	white-space:nowrap;
	mso-rotate:0;}
.xl65
	{mso-number-format:0%;}
.xl66
	{vertical-align:middle;
	white-space:normal;}
.xl67
	{vertical-align:middle;}
-->
</head>

<body link="#0563C1" vlink="#954F72">



current |   |   |  
-- | -- | -- | --
  | Total time: | 64.9s |  
  | bevy_pbr v0.9.0-dev | 19.2s |  
  | bevy_render v0.9.0-dev | 17.0s |  
  | bevy_sprite v0.9.0-dev | 15.1s |  
  | DepthPipelineBuilder | 18.7s |  
  |   |   |  
with type-erasing |   |   | diff
  | Total time: | 49.0s | -24%
  | bevy_render v0.9.0-dev | 12.0s | -38%
  | bevy_pbr v0.9.0-dev | 8.7s | -49%
  | bevy_sprite v0.9.0-dev | 6.1s | -60%
  | DepthPipelineBuilder | 1.2s | -94%



</body>

</html>

the depth pipeline builder is a binary with body: 
```rust
use std::{marker::PhantomData, hash::Hash};
use bevy::{prelude::*, ecs::system::SystemParam, pbr::{RenderMaterials, MaterialPipeline, ShadowPipeline}, render::{renderer::RenderDevice, render_resource::{SpecializedMeshPipelines, PipelineCache}, render_asset::RenderAssets}};

fn main() {
    println!("Hello, world p!\n");
}

#[derive(SystemParam)]
pub struct DepthPipelineBuilder<'w, 's, M: Material> 
where M::Data: Eq + Hash + Clone,
{
    render_device: Res<'w, RenderDevice>,
    material_pipeline: Res<'w, MaterialPipeline<M>>,
    material_pipelines: ResMut<'w, SpecializedMeshPipelines<MaterialPipeline<M>>>,
    shadow_pipeline: Res<'w, ShadowPipeline>,
    pipeline_cache: ResMut<'w, PipelineCache>,
    render_meshes: Res<'w, RenderAssets<Mesh>>,
    render_materials: Res<'w, RenderMaterials<M>>,
    msaa: Res<'w, Msaa>,
    #[system_param(ignore)]
    _p: PhantomData<&'s M>,
}
```
This commit is contained in:
robtfm 2022-11-18 22:04:23 +00:00
parent 5972879dec
commit 2cd0bd7575
10 changed files with 202 additions and 47 deletions

View file

@ -9,9 +9,12 @@ use crate::{
texture::FallbackImage,
};
use bevy_reflect::Uuid;
use std::{ops::Deref, sync::Arc};
use std::ops::Deref;
use wgpu::BindingResource;
use crate::render_resource::resource_macros::*;
render_resource_wrapper!(ErasedBindGroup, wgpu::BindGroup);
/// A [`BindGroup`] identifier.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct BindGroupId(Uuid);
@ -25,7 +28,7 @@ pub struct BindGroupId(Uuid);
#[derive(Clone, Debug)]
pub struct BindGroup {
id: BindGroupId,
value: Arc<wgpu::BindGroup>,
value: ErasedBindGroup,
}
impl BindGroup {
@ -40,7 +43,7 @@ impl From<wgpu::BindGroup> for BindGroup {
fn from(value: wgpu::BindGroup) -> Self {
BindGroup {
id: BindGroupId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedBindGroup::new(value),
}
}
}

View file

@ -1,13 +1,16 @@
use crate::render_resource::resource_macros::*;
use bevy_reflect::Uuid;
use std::{ops::Deref, sync::Arc};
use std::ops::Deref;
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct BindGroupLayoutId(Uuid);
render_resource_wrapper!(ErasedBindGroupLayout, wgpu::BindGroupLayout);
#[derive(Clone, Debug)]
pub struct BindGroupLayout {
id: BindGroupLayoutId,
value: Arc<wgpu::BindGroupLayout>,
value: ErasedBindGroupLayout,
}
impl PartialEq for BindGroupLayout {
@ -32,7 +35,7 @@ impl From<wgpu::BindGroupLayout> for BindGroupLayout {
fn from(value: wgpu::BindGroupLayout) -> Self {
BindGroupLayout {
id: BindGroupLayoutId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedBindGroupLayout::new(value),
}
}
}

View file

@ -1,16 +1,17 @@
use bevy_utils::Uuid;
use std::{
ops::{Bound, Deref, RangeBounds},
sync::Arc,
};
use std::ops::{Bound, Deref, RangeBounds};
use crate::render_resource::resource_macros::*;
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct BufferId(Uuid);
render_resource_wrapper!(ErasedBuffer, wgpu::Buffer);
#[derive(Clone, Debug)]
pub struct Buffer {
id: BufferId,
value: Arc<wgpu::Buffer>,
value: ErasedBuffer,
}
impl Buffer {
@ -42,7 +43,7 @@ impl From<wgpu::Buffer> for Buffer {
fn from(value: wgpu::Buffer) -> Self {
Buffer {
id: BufferId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedBuffer::new(value),
}
}
}

View file

@ -5,6 +5,7 @@ mod buffer_vec;
mod pipeline;
mod pipeline_cache;
mod pipeline_specializer;
pub mod resource_macros;
mod shader;
mod storage_buffer;
mod texture;

View file

@ -1,16 +1,20 @@
use crate::render_resource::{BindGroupLayout, Shader};
use bevy_asset::Handle;
use bevy_reflect::Uuid;
use std::{borrow::Cow, ops::Deref, sync::Arc};
use std::{borrow::Cow, ops::Deref};
use wgpu::{
BufferAddress, ColorTargetState, DepthStencilState, MultisampleState, PrimitiveState,
VertexAttribute, VertexFormat, VertexStepMode,
};
use crate::render_resource::resource_macros::*;
/// A [`RenderPipeline`] identifier.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct RenderPipelineId(Uuid);
render_resource_wrapper!(ErasedRenderPipeline, wgpu::RenderPipeline);
/// A [`RenderPipeline`] represents a graphics pipeline and its stages (shaders), bindings and vertex buffers.
///
/// May be converted from and dereferences to a wgpu [`RenderPipeline`](wgpu::RenderPipeline).
@ -18,7 +22,7 @@ pub struct RenderPipelineId(Uuid);
#[derive(Clone, Debug)]
pub struct RenderPipeline {
id: RenderPipelineId,
value: Arc<wgpu::RenderPipeline>,
value: ErasedRenderPipeline,
}
impl RenderPipeline {
@ -32,7 +36,7 @@ impl From<wgpu::RenderPipeline> for RenderPipeline {
fn from(value: wgpu::RenderPipeline) -> Self {
RenderPipeline {
id: RenderPipelineId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedRenderPipeline::new(value),
}
}
}
@ -50,6 +54,8 @@ impl Deref for RenderPipeline {
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct ComputePipelineId(Uuid);
render_resource_wrapper!(ErasedComputePipeline, wgpu::ComputePipeline);
/// A [`ComputePipeline`] represents a compute pipeline and its single shader stage.
///
/// May be converted from and dereferences to a wgpu [`ComputePipeline`](wgpu::ComputePipeline).
@ -57,7 +63,7 @@ pub struct ComputePipelineId(Uuid);
#[derive(Clone, Debug)]
pub struct ComputePipeline {
id: ComputePipelineId,
value: Arc<wgpu::ComputePipeline>,
value: ErasedComputePipeline,
}
impl ComputePipeline {
@ -72,7 +78,7 @@ impl From<wgpu::ComputePipeline> for ComputePipeline {
fn from(value: wgpu::ComputePipeline) -> Self {
ComputePipeline {
id: ComputePipelineId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedComputePipeline::new(value),
}
}
}

View file

@ -17,13 +17,17 @@ use bevy_utils::{
tracing::{debug, error},
Entry, HashMap, HashSet,
};
use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref, sync::Arc};
use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref};
use thiserror::Error;
use wgpu::{
BufferBindingType, PipelineLayoutDescriptor, ShaderModule,
VertexBufferLayout as RawVertexBufferLayout,
BufferBindingType, PipelineLayoutDescriptor, VertexBufferLayout as RawVertexBufferLayout,
};
use crate::render_resource::resource_macros::*;
render_resource_wrapper!(ErasedShaderModule, wgpu::ShaderModule);
render_resource_wrapper!(ErasedPipelineLayout, wgpu::PipelineLayout);
/// A descriptor for a [`Pipeline`].
///
/// Used to store an heterogenous collection of render and compute pipeline descriptors together.
@ -103,7 +107,7 @@ impl CachedPipelineState {
#[derive(Default)]
struct ShaderData {
pipelines: HashSet<CachedPipelineId>,
processed_shaders: HashMap<Vec<String>, Arc<ShaderModule>>,
processed_shaders: HashMap<Vec<String>, ErasedShaderModule>,
resolved_imports: HashMap<ShaderImport, Handle<Shader>>,
dependents: HashSet<Handle<Shader>>,
}
@ -124,7 +128,7 @@ impl ShaderCache {
pipeline: CachedPipelineId,
handle: &Handle<Shader>,
shader_defs: &[String],
) -> Result<Arc<ShaderModule>, PipelineCacheError> {
) -> Result<ErasedShaderModule, PipelineCacheError> {
let shader = self
.shaders
.get(handle)
@ -204,7 +208,7 @@ impl ShaderCache {
return Err(PipelineCacheError::CreateShaderModule(description));
}
entry.insert(Arc::new(shader_module))
entry.insert(ErasedShaderModule::new(shader_module))
}
};
@ -276,7 +280,7 @@ impl ShaderCache {
#[derive(Default)]
struct LayoutCache {
layouts: HashMap<Vec<BindGroupLayoutId>, wgpu::PipelineLayout>,
layouts: HashMap<Vec<BindGroupLayoutId>, ErasedPipelineLayout>,
}
impl LayoutCache {
@ -291,10 +295,12 @@ impl LayoutCache {
.iter()
.map(|l| l.value())
.collect::<Vec<_>>();
render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
ErasedPipelineLayout::new(render_device.create_pipeline_layout(
&PipelineLayoutDescriptor {
bind_group_layouts: &bind_group_layouts,
..default()
})
},
))
})
}
}

View file

@ -0,0 +1,123 @@
// structs containing wgpu types take a long time to compile. this is particularly bad for generic
// structs containing wgpu structs. we avoid that in debug builds (and for cargo check and rust analyzer)
// by boxing and type-erasing with the `render_resource_wrapper` macro.
// analysis from https://github.com/bevyengine/bevy/pull/5950#issuecomment-1243473071 indicates this is
// due to `evaluate_obligations`. we should check if this can be removed after a fix lands for
// https://github.com/rust-lang/rust/issues/99188 (and after other `evaluate_obligations`-related changes).
#[cfg(debug_assertions)]
#[macro_export]
macro_rules! render_resource_wrapper {
($wrapper_type:ident, $wgpu_type:ty) => {
#[derive(Clone, Debug)]
pub struct $wrapper_type(Option<std::sync::Arc<Box<()>>>);
impl $wrapper_type {
pub fn new(value: $wgpu_type) -> Self {
unsafe {
Self(Some(std::sync::Arc::new(std::mem::transmute(Box::new(
value,
)))))
}
}
pub fn try_unwrap(mut self) -> Option<$wgpu_type> {
let inner = self.0.take();
if let Some(inner) = inner {
match std::sync::Arc::try_unwrap(inner) {
Ok(untyped_box) => {
let typed_box = unsafe {
std::mem::transmute::<Box<()>, Box<$wgpu_type>>(untyped_box)
};
Some(*typed_box)
}
Err(inner) => {
let _ = unsafe {
std::mem::transmute::<
std::sync::Arc<Box<()>>,
std::sync::Arc<Box<$wgpu_type>>,
>(inner)
};
None
}
}
} else {
None
}
}
}
impl std::ops::Deref for $wrapper_type {
type Target = $wgpu_type;
fn deref(&self) -> &Self::Target {
let untyped_box = self
.0
.as_ref()
.expect("render_resource_wrapper inner value has already been taken (via drop or try_unwrap")
.as_ref();
let typed_box =
unsafe { std::mem::transmute::<&Box<()>, &Box<$wgpu_type>>(untyped_box) };
typed_box.as_ref()
}
}
impl Drop for $wrapper_type {
fn drop(&mut self) {
let inner = self.0.take();
if let Some(inner) = inner {
let _ = unsafe {
std::mem::transmute::<
std::sync::Arc<Box<()>>,
std::sync::Arc<Box<$wgpu_type>>,
>(inner)
};
}
}
}
// Arc<Box<()>> and Arc<()> will be Sync and Send even when $wgpu_type is not Sync or Send.
// We ensure correctness by checking that $wgpu_type does implement Send and Sync.
// If in future there is a case where a wrapper is required for a non-send/sync type
// we can implement a macro variant that also does `impl !Send for $wrapper_type {}` and
// `impl !Sync for $wrapper_type {}`
const _: () = {
trait AssertSendSyncBound: Send + Sync {}
impl AssertSendSyncBound for $wgpu_type {}
};
};
}
#[cfg(not(debug_assertions))]
#[macro_export]
macro_rules! render_resource_wrapper {
($wrapper_type:ident, $wgpu_type:ty) => {
#[derive(Clone, Debug)]
pub struct $wrapper_type(std::sync::Arc<$wgpu_type>);
impl $wrapper_type {
pub fn new(value: $wgpu_type) -> Self {
Self(std::sync::Arc::new(value))
}
pub fn try_unwrap(self) -> Option<$wgpu_type> {
std::sync::Arc::try_unwrap(self.0).ok()
}
}
impl std::ops::Deref for $wrapper_type {
type Target = $wgpu_type;
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
const _: () = {
trait AssertSendSyncBound: Send + Sync {}
impl AssertSendSyncBound for $wgpu_type {}
};
};
}
pub use render_resource_wrapper;

View file

@ -1,10 +1,14 @@
use bevy_utils::Uuid;
use std::{ops::Deref, sync::Arc};
use std::ops::Deref;
use crate::render_resource::resource_macros::*;
/// A [`Texture`] identifier.
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct TextureId(Uuid);
render_resource_wrapper!(ErasedTexture, wgpu::Texture);
/// A GPU-accessible texture.
///
/// May be converted from and dereferences to a wgpu [`Texture`](wgpu::Texture).
@ -12,7 +16,7 @@ pub struct TextureId(Uuid);
#[derive(Clone, Debug)]
pub struct Texture {
id: TextureId,
value: Arc<wgpu::Texture>,
value: ErasedTexture,
}
impl Texture {
@ -32,7 +36,7 @@ impl From<wgpu::Texture> for Texture {
fn from(value: wgpu::Texture) -> Self {
Texture {
id: TextureId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedTexture::new(value),
}
}
}
@ -50,20 +54,23 @@ impl Deref for Texture {
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct TextureViewId(Uuid);
render_resource_wrapper!(ErasedTextureView, wgpu::TextureView);
render_resource_wrapper!(ErasedSurfaceTexture, wgpu::SurfaceTexture);
/// This type combines wgpu's [`TextureView`](wgpu::TextureView) and
/// [`SurfaceTexture`](wgpu::SurfaceTexture) into the same interface.
#[derive(Clone, Debug)]
pub enum TextureViewValue {
/// The value is an actual wgpu [`TextureView`](wgpu::TextureView).
TextureView(Arc<wgpu::TextureView>),
TextureView(ErasedTextureView),
/// The value is a wgpu [`SurfaceTexture`](wgpu::SurfaceTexture), but dereferences to
/// a [`TextureView`](wgpu::TextureView).
SurfaceTexture {
// NOTE: The order of these fields is important because the view must be dropped before the
// frame is dropped
view: Arc<wgpu::TextureView>,
texture: Arc<wgpu::SurfaceTexture>,
view: ErasedTextureView,
texture: ErasedSurfaceTexture,
},
}
@ -89,7 +96,7 @@ impl TextureView {
pub fn take_surface_texture(self) -> Option<wgpu::SurfaceTexture> {
match self.value {
TextureViewValue::TextureView(_) => None,
TextureViewValue::SurfaceTexture { texture, .. } => Arc::try_unwrap(texture).ok(),
TextureViewValue::SurfaceTexture { texture, .. } => texture.try_unwrap(),
}
}
}
@ -98,15 +105,15 @@ impl From<wgpu::TextureView> for TextureView {
fn from(value: wgpu::TextureView) -> Self {
TextureView {
id: TextureViewId(Uuid::new_v4()),
value: TextureViewValue::TextureView(Arc::new(value)),
value: TextureViewValue::TextureView(ErasedTextureView::new(value)),
}
}
}
impl From<wgpu::SurfaceTexture> for TextureView {
fn from(value: wgpu::SurfaceTexture) -> Self {
let texture = Arc::new(value);
let view = Arc::new(texture.texture.create_view(&Default::default()));
let view = ErasedTextureView::new(value.texture.create_view(&Default::default()));
let texture = ErasedSurfaceTexture::new(value);
TextureView {
id: TextureViewId(Uuid::new_v4()),
@ -131,6 +138,8 @@ impl Deref for TextureView {
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
pub struct SamplerId(Uuid);
render_resource_wrapper!(ErasedSampler, wgpu::Sampler);
/// A Sampler defines how a pipeline will sample from a [`TextureView`].
/// They define image filters (including anisotropy) and address (wrapping) modes, among other things.
///
@ -139,7 +148,7 @@ pub struct SamplerId(Uuid);
#[derive(Clone, Debug)]
pub struct Sampler {
id: SamplerId,
value: Arc<wgpu::Sampler>,
value: ErasedSampler,
}
impl Sampler {
@ -154,7 +163,7 @@ impl From<wgpu::Sampler> for Sampler {
fn from(value: wgpu::Sampler) -> Self {
Sampler {
id: SamplerId(Uuid::new_v4()),
value: Arc::new(value),
value: ErasedSampler::new(value),
}
}
}

View file

@ -261,8 +261,6 @@ pub async fn initialize_renderer(
)
.await
.unwrap();
let device = Arc::new(device);
let queue = Arc::new(queue);
let adapter = Arc::new(adapter);
(

View file

@ -3,20 +3,25 @@ use crate::render_resource::{
RenderPipeline, Sampler, Texture,
};
use bevy_ecs::system::Resource;
use std::sync::Arc;
use wgpu::{util::DeviceExt, BufferAsyncError, BufferBindingType};
use super::RenderQueue;
use crate::render_resource::resource_macros::*;
render_resource_wrapper!(ErasedRenderDevice, wgpu::Device);
/// This GPU device is responsible for the creation of most rendering and compute resources.
#[derive(Resource, Clone)]
pub struct RenderDevice {
device: Arc<wgpu::Device>,
device: ErasedRenderDevice,
}
impl From<Arc<wgpu::Device>> for RenderDevice {
fn from(device: Arc<wgpu::Device>) -> Self {
Self { device }
impl From<wgpu::Device> for RenderDevice {
fn from(device: wgpu::Device) -> Self {
Self {
device: ErasedRenderDevice::new(device),
}
}
}