diff --git a/assets/shaders/gpu_readback.wgsl b/assets/shaders/gpu_readback.wgsl index 4b91c9bcd7..190508c270 100644 --- a/assets/shaders/gpu_readback.wgsl +++ b/assets/shaders/gpu_readback.wgsl @@ -3,10 +3,13 @@ // This is the data that lives in the gpu only buffer @group(0) @binding(0) var data: array; +@group(0) @binding(1) var texture: texture_storage_2d; @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3) { // We use the global_id to index the array to make sure we don't // access data used in another workgroup data[global_id.x] += 1u; + // Write the same data to the texture + textureStore(texture, vec2(i32(global_id.x), 0), vec4(data[global_id.x], 0, 0, 0)); } diff --git a/crates/bevy_render/src/gpu_readback.rs b/crates/bevy_render/src/gpu_readback.rs new file mode 100644 index 0000000000..ffbf83ab11 --- /dev/null +++ b/crates/bevy_render/src/gpu_readback.rs @@ -0,0 +1,370 @@ +use crate::{ + extract_component::ExtractComponentPlugin, + prelude::Image, + render_asset::RenderAssets, + render_resource::{Buffer, BufferUsages, Extent3d, ImageDataLayout, Texture, TextureFormat}, + renderer::{render_system, RenderDevice}, + storage::{GpuShaderStorageBuffer, ShaderStorageBuffer}, + texture::{GpuImage, TextureFormatPixelInfo}, + ExtractSchedule, MainWorld, Render, RenderApp, RenderSet, +}; +use async_channel::{Receiver, Sender}; +use bevy_app::{App, Plugin}; +use bevy_asset::Handle; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::schedule::IntoSystemConfigs; +use bevy_ecs::{ + change_detection::ResMut, + entity::Entity, + event::Event, + prelude::{Component, Resource, World}, + system::{Query, Res}, +}; +use bevy_reflect::Reflect; +use bevy_render_macros::ExtractComponent; +use bevy_utils::{default, tracing::warn, HashMap}; +use encase::internal::ReadFrom; +use encase::private::Reader; +use encase::ShaderType; +use wgpu::{CommandEncoder, COPY_BYTES_PER_ROW_ALIGNMENT}; + +/// A plugin that enables reading back gpu buffers and textures to the cpu. +pub struct GpuReadbackPlugin { + /// Describes the number of frames a buffer can be unused before it is removed from the pool in + /// order to avoid unnecessary reallocations. + max_unused_frames: usize, +} + +impl Default for GpuReadbackPlugin { + fn default() -> Self { + Self { + max_unused_frames: 10, + } + } +} + +impl Plugin for GpuReadbackPlugin { + fn build(&self, app: &mut App) { + app.add_plugins(ExtractComponentPlugin::::default()); + + if let Some(render_app) = app.get_sub_app_mut(RenderApp) { + render_app + .init_resource::() + .init_resource::() + .insert_resource(GpuReadbackMaxUnusedFrames(self.max_unused_frames)) + .add_systems(ExtractSchedule, sync_readbacks.ambiguous_with_all()) + .add_systems( + Render, + ( + prepare_buffers.in_set(RenderSet::PrepareResources), + map_buffers.after(render_system).in_set(RenderSet::Render), + ), + ); + } + } +} + +/// A component that registers the wrapped handle for gpu readback, either a texture or a buffer. +/// +/// Data is read asynchronously and will be triggered on the entity via the [`ReadbackComplete`] event +/// when complete. If this component is not removed, the readback will be attempted every frame +#[derive(Component, ExtractComponent, Clone, Debug)] +pub enum Readback { + Texture(Handle), + Buffer(Handle), +} + +impl Readback { + /// Create a readback component for a texture using the given handle. + pub fn texture(image: Handle) -> Self { + Self::Texture(image) + } + + /// Create a readback component for a buffer using the given handle. + pub fn buffer(buffer: Handle) -> Self { + Self::Buffer(buffer) + } +} + +/// An event that is triggered when a gpu readback is complete. +/// +/// The event contains the data as a `Vec`, which can be interpreted as the raw bytes of the +/// requested buffer or texture. +#[derive(Event, Deref, DerefMut, Reflect, Debug)] +#[reflect(Debug)] +pub struct ReadbackComplete(pub Vec); + +impl ReadbackComplete { + /// Convert the raw bytes of the event to a shader type. + pub fn to_shader_type(&self) -> T { + let mut val = T::default(); + let mut reader = Reader::new::(&self.0, 0).expect("Failed to create Reader"); + T::read_from(&mut val, &mut reader); + val + } +} + +#[derive(Resource)] +struct GpuReadbackMaxUnusedFrames(usize); + +struct GpuReadbackBuffer { + buffer: Buffer, + taken: bool, + frames_unused: usize, +} + +#[derive(Resource, Default)] +struct GpuReadbackBufferPool { + // Map of buffer size to list of buffers, with a flag for whether the buffer is taken and how + // many frames it has been unused for. + // TODO: We could ideally write all readback data to one big buffer per frame, the assumption + // here is that very few entities well actually be read back at once, and their size is + // unlikely to change. + buffers: HashMap>, +} + +impl GpuReadbackBufferPool { + fn get(&mut self, render_device: &RenderDevice, size: u64) -> Buffer { + let buffers = self.buffers.entry(size).or_default(); + + // find an untaken buffer for this size + if let Some(buf) = buffers.iter_mut().find(|x| !x.taken) { + buf.taken = true; + buf.frames_unused = 0; + return buf.buffer.clone(); + } + + let buffer = render_device.create_buffer(&wgpu::BufferDescriptor { + label: Some("Readback Buffer"), + size, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + buffers.push(GpuReadbackBuffer { + buffer: buffer.clone(), + taken: true, + frames_unused: 0, + }); + buffer + } + + // Returns the buffer to the pool so it can be used in a future frame + fn return_buffer(&mut self, buffer: &Buffer) { + let size = buffer.size(); + let buffers = self + .buffers + .get_mut(&size) + .expect("Returned buffer of untracked size"); + if let Some(buf) = buffers.iter_mut().find(|x| x.buffer.id() == buffer.id()) { + buf.taken = false; + } else { + warn!("Returned buffer that was not allocated"); + } + } + + fn update(&mut self, max_unused_frames: usize) { + for (_, buffers) in &mut self.buffers { + // Tick all the buffers + for buf in &mut *buffers { + if !buf.taken { + buf.frames_unused += 1; + } + } + + // Remove buffers that haven't been used for MAX_UNUSED_FRAMES + buffers.retain(|x| x.frames_unused < max_unused_frames); + } + + // Remove empty buffer sizes + self.buffers.retain(|_, buffers| !buffers.is_empty()); + } +} + +enum ReadbackSource { + Texture { + texture: Texture, + layout: ImageDataLayout, + size: Extent3d, + }, + Buffer { + src_start: u64, + dst_start: u64, + buffer: Buffer, + }, +} + +#[derive(Resource, Default)] +struct GpuReadbacks { + requested: Vec, + mapped: Vec, +} + +struct GpuReadback { + pub entity: Entity, + pub src: ReadbackSource, + pub buffer: Buffer, + pub rx: Receiver<(Entity, Buffer, Vec)>, + pub tx: Sender<(Entity, Buffer, Vec)>, +} + +fn sync_readbacks( + mut main_world: ResMut, + mut buffer_pool: ResMut, + mut readbacks: ResMut, + max_unused_frames: Res, +) { + readbacks.mapped.retain(|readback| { + if let Ok((entity, buffer, result)) = readback.rx.try_recv() { + main_world.trigger_targets(ReadbackComplete(result), entity); + buffer_pool.return_buffer(&buffer); + false + } else { + true + } + }); + + buffer_pool.update(max_unused_frames.0); +} + +fn prepare_buffers( + render_device: Res, + mut readbacks: ResMut, + mut buffer_pool: ResMut, + gpu_images: Res>, + ssbos: Res>, + handles: Query<(Entity, &Readback)>, +) { + for (entity, readback) in handles.iter() { + match readback { + Readback::Texture(image) => { + if let Some(gpu_image) = gpu_images.get(image) { + let size = Extent3d { + width: gpu_image.size.x, + height: gpu_image.size.y, + ..default() + }; + let layout = layout_data(size.width, size.height, gpu_image.texture_format); + let buffer = buffer_pool.get( + &render_device, + get_aligned_size( + size.width, + size.height, + gpu_image.texture_format.pixel_size() as u32, + ) as u64, + ); + let (tx, rx) = async_channel::bounded(1); + readbacks.requested.push(GpuReadback { + entity, + src: ReadbackSource::Texture { + texture: gpu_image.texture.clone(), + layout, + size, + }, + buffer, + rx, + tx, + }); + } + } + Readback::Buffer(buffer) => { + if let Some(ssbo) = ssbos.get(buffer) { + let size = ssbo.buffer.size(); + let buffer = buffer_pool.get(&render_device, size); + let (tx, rx) = async_channel::bounded(1); + readbacks.requested.push(GpuReadback { + entity, + src: ReadbackSource::Buffer { + src_start: 0, + dst_start: 0, + buffer: ssbo.buffer.clone(), + }, + buffer, + rx, + tx, + }); + } + } + } + } +} + +pub(crate) fn submit_readback_commands(world: &World, command_encoder: &mut CommandEncoder) { + let readbacks = world.resource::(); + for readback in &readbacks.requested { + match &readback.src { + ReadbackSource::Texture { + texture, + layout, + size, + } => { + command_encoder.copy_texture_to_buffer( + texture.as_image_copy(), + wgpu::ImageCopyBuffer { + buffer: &readback.buffer, + layout: *layout, + }, + *size, + ); + } + ReadbackSource::Buffer { + src_start, + dst_start, + buffer, + } => { + command_encoder.copy_buffer_to_buffer( + buffer, + *src_start, + &readback.buffer, + *dst_start, + buffer.size(), + ); + } + } + } +} + +/// Move requested readbacks to mapped readbacks after commands have been submitted in render system +fn map_buffers(mut readbacks: ResMut) { + let requested = readbacks.requested.drain(..).collect::>(); + for readback in requested { + let slice = readback.buffer.slice(..); + let entity = readback.entity; + let buffer = readback.buffer.clone(); + let tx = readback.tx.clone(); + slice.map_async(wgpu::MapMode::Read, move |res| { + res.expect("Failed to map buffer"); + let buffer_slice = buffer.slice(..); + let data = buffer_slice.get_mapped_range(); + let result = Vec::from(&*data); + drop(data); + buffer.unmap(); + if let Err(e) = tx.try_send((entity, buffer, result)) { + warn!("Failed to send readback result: {:?}", e); + } + }); + readbacks.mapped.push(readback); + } +} + +// Utils + +pub(crate) fn align_byte_size(value: u32) -> u32 { + value + (COPY_BYTES_PER_ROW_ALIGNMENT - (value % COPY_BYTES_PER_ROW_ALIGNMENT)) +} + +pub(crate) fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 { + height * align_byte_size(width * pixel_size) +} + +pub(crate) fn layout_data(width: u32, height: u32, format: TextureFormat) -> ImageDataLayout { + ImageDataLayout { + bytes_per_row: if height > 1 { + // 1 = 1 row + Some(get_aligned_size(width, 1, format.pixel_size() as u32)) + } else { + None + }, + rows_per_image: None, + ..Default::default() + } +} diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index 9cd0b28d1c..c69ddac032 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -25,6 +25,7 @@ mod extract_param; pub mod extract_resource; pub mod globals; pub mod gpu_component_array_buffer; +pub mod gpu_readback; pub mod mesh; #[cfg(not(target_arch = "wasm32"))] pub mod pipelined_rendering; @@ -73,6 +74,7 @@ use globals::GlobalsPlugin; use render_asset::RenderAssetBytesPerFrame; use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue}; +use crate::gpu_readback::GpuReadbackPlugin; use crate::{ camera::CameraPlugin, mesh::{morph::MorphPlugin, MeshPlugin, RenderMesh}, @@ -363,6 +365,7 @@ impl Plugin for RenderPlugin { MorphPlugin, BatchingPlugin, StoragePlugin, + GpuReadbackPlugin::default(), )); app.init_resource::() diff --git a/crates/bevy_render/src/renderer/mod.rs b/crates/bevy_render/src/renderer/mod.rs index c45995ee77..51b3522875 100644 --- a/crates/bevy_render/src/renderer/mod.rs +++ b/crates/bevy_render/src/renderer/mod.rs @@ -46,6 +46,7 @@ pub fn render_system(world: &mut World, state: &mut SystemState u32 { - value + (COPY_BYTES_PER_ROW_ALIGNMENT - (value % COPY_BYTES_PER_ROW_ALIGNMENT)) -} - -pub(crate) fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 { - height * align_byte_size(width * pixel_size) -} - -pub(crate) fn layout_data(width: u32, height: u32, format: TextureFormat) -> ImageDataLayout { - ImageDataLayout { - bytes_per_row: if height > 1 { - // 1 = 1 row - Some(get_aligned_size(width, 1, format.pixel_size() as u32)) - } else { - None - }, - rows_per_image: None, - ..Default::default() - } -} - #[derive(Resource)] pub struct ScreenshotToScreenPipeline { pub bind_group_layout: BindGroupLayout, @@ -619,7 +598,7 @@ fn render_screenshot( prepared_state.texture.as_image_copy(), wgpu::ImageCopyBuffer { buffer: &prepared_state.buffer, - layout: layout_data(width, height, texture_format), + layout: gpu_readback::layout_data(width, height, texture_format), }, Extent3d { width, @@ -687,7 +666,8 @@ pub(crate) fn collect_screenshots(world: &mut World) { // Our buffer has been padded because we needed to align to a multiple of 256. // We remove this padding here let initial_row_bytes = width as usize * pixel_size; - let buffered_row_bytes = align_byte_size(width * pixel_size as u32) as usize; + let buffered_row_bytes = + gpu_readback::align_byte_size(width * pixel_size as u32) as usize; let mut take_offset = buffered_row_bytes; let mut place_offset = initial_row_bytes; diff --git a/examples/shader/gpu_readback.rs b/examples/shader/gpu_readback.rs index ce11f6471f..d8ee657706 100644 --- a/examples/shader/gpu_readback.rs +++ b/examples/shader/gpu_readback.rs @@ -1,22 +1,24 @@ -//! A very simple compute shader that updates a gpu buffer. -//! That buffer is then copied to the cpu and sent to the main world. -//! -//! This example is not meant to teach compute shaders. -//! It is only meant to explain how to read a gpu buffer on the cpu and then use it in the main world. -//! -//! The code is based on this wgpu example: -//! +//! Simple example demonstrating the use of the [`Readback`] component to read back data from the GPU +//! using both a storage buffer and texture. use bevy::{ prelude::*, render::{ - render_graph::{self, RenderGraph, RenderLabel}, - render_resource::{binding_types::storage_buffer, *}, - renderer::{RenderContext, RenderDevice, RenderQueue}, + extract_resource::{ExtractResource, ExtractResourcePlugin}, + gpu_readback::{Readback, ReadbackComplete}, + render_asset::{RenderAssetUsages, RenderAssets}, + render_graph, + render_graph::{RenderGraph, RenderLabel}, + render_resource::{ + binding_types::{storage_buffer, texture_storage_2d}, + *, + }, + renderer::{RenderContext, RenderDevice}, + storage::{GpuShaderStorageBuffer, ShaderStorageBuffer}, + texture::GpuImage, Render, RenderApp, RenderSet, }, }; -use crossbeam_channel::{Receiver, Sender}; /// This example uses a shader source file from the assets subdirectory const SHADER_ASSET_PATH: &str = "shaders/gpu_readback.wgsl"; @@ -24,66 +26,33 @@ const SHADER_ASSET_PATH: &str = "shaders/gpu_readback.wgsl"; // The length of the buffer sent to the gpu const BUFFER_LEN: usize = 16; -// To communicate between the main world and the render world we need a channel. -// Since the main world and render world run in parallel, there will always be a frame of latency -// between the data sent from the render world and the data received in the main world -// -// frame n => render world sends data through the channel at the end of the frame -// frame n + 1 => main world receives the data - -/// This will receive asynchronously any data sent from the render world -#[derive(Resource, Deref)] -struct MainWorldReceiver(Receiver>); - -/// This will send asynchronously any data to the main world -#[derive(Resource, Deref)] -struct RenderWorldSender(Sender>); - fn main() { App::new() + .add_plugins(( + DefaultPlugins, + GpuReadbackPlugin, + ExtractResourcePlugin::::default(), + ExtractResourcePlugin::::default(), + )) .insert_resource(ClearColor(Color::BLACK)) - .add_plugins((DefaultPlugins, GpuReadbackPlugin)) - .add_systems(Update, receive) + .add_systems(Startup, setup) .run(); } -/// This system will poll the channel and try to get the data sent from the render world -fn receive(receiver: Res) { - // We don't want to block the main world on this, - // so we use try_recv which attempts to receive without blocking - if let Ok(data) = receiver.try_recv() { - println!("Received data from render world: {data:?}"); - } -} - // We need a plugin to organize all the systems and render node required for this example struct GpuReadbackPlugin; impl Plugin for GpuReadbackPlugin { fn build(&self, _app: &mut App) {} - // The render device is only accessible inside finish(). - // So we need to initialize render resources here. fn finish(&self, app: &mut App) { - let (s, r) = crossbeam_channel::unbounded(); - app.insert_resource(MainWorldReceiver(r)); - let render_app = app.sub_app_mut(RenderApp); - render_app - .insert_resource(RenderWorldSender(s)) - .init_resource::() - .init_resource::() - .add_systems( - Render, - ( - prepare_bind_group - .in_set(RenderSet::PrepareBindGroups) - // We don't need to recreate the bind group every frame - .run_if(not(resource_exists::)), - // We need to run it after the render graph is done - // because this needs to happen after submit() - map_and_read_buffer.after(RenderSet::Render), - ), - ); + render_app.init_resource::().add_systems( + Render, + prepare_bind_group + .in_set(RenderSet::PrepareBindGroups) + // We don't need to recreate the bind group every frame + .run_if(not(resource_exists::)), + ); // Add the compute node as a top level node to the render graph // This means it will only execute once per frame @@ -94,51 +63,68 @@ impl Plugin for GpuReadbackPlugin { } } -/// Holds the buffers that will be used to communicate between the cpu and gpu -#[derive(Resource)] -struct Buffers { - /// The buffer that will be used by the compute shader - /// - /// In this example, we want to write a `Vec` to a `Buffer`. `BufferVec` is a wrapper around a `Buffer` - /// that will make sure the data is correctly aligned for the gpu and will simplify uploading the data to the gpu. - gpu_buffer: BufferVec, - /// The buffer that will be read on the cpu. - /// The `gpu_buffer` will be copied to this buffer every frame - cpu_buffer: Buffer, -} +#[derive(Resource, ExtractResource, Clone)] +struct ReadbackBuffer(Handle); -impl FromWorld for Buffers { - fn from_world(world: &mut World) -> Self { - let render_device = world.resource::(); - let render_queue = world.resource::(); +#[derive(Resource, ExtractResource, Clone)] +struct ReadbackImage(Handle); - // Create the buffer that will be accessed by the gpu - let mut gpu_buffer = BufferVec::new(BufferUsages::STORAGE | BufferUsages::COPY_SRC); - for _ in 0..BUFFER_LEN { - // Init the buffer with zeroes - gpu_buffer.push(0); - } - // Write the buffer so the data is accessible on the gpu - gpu_buffer.write_buffer(render_device, render_queue); +fn setup( + mut commands: Commands, + mut images: ResMut>, + mut buffers: ResMut>, +) { + // Create a storage buffer with some data + let buffer = vec![0u32; BUFFER_LEN]; + let mut buffer = ShaderStorageBuffer::from(buffer); + // We need to enable the COPY_SRC usage so we can copy the buffer to the cpu + buffer.buffer_description.usage |= BufferUsages::COPY_SRC; + let buffer = buffers.add(buffer); - // For portability reasons, WebGPU draws a distinction between memory that is - // accessible by the CPU and memory that is accessible by the GPU. Only - // buffers accessible by the CPU can be mapped and accessed by the CPU and - // only buffers visible to the GPU can be used in shaders. In order to get - // data from the GPU, we need to use `CommandEncoder::copy_buffer_to_buffer` to - // copy the buffer modified by the GPU into a mappable, CPU-accessible buffer - let cpu_buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("readback_buffer"), - size: (BUFFER_LEN * size_of::()) as u64, - usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, - mapped_at_creation: false, - }); + // Create a storage texture with some data + let size = Extent3d { + width: BUFFER_LEN as u32, + height: 1, + ..default() + }; + let mut image = Image::new_fill( + size, + TextureDimension::D2, + &[0, 0, 0, 0], + TextureFormat::R32Uint, + RenderAssetUsages::RENDER_WORLD, + ); + // We also need to enable the COPY_SRC, as well as STORAGE_BINDING so we can use it in the + // compute shader + image.texture_descriptor.usage |= TextureUsages::COPY_SRC | TextureUsages::STORAGE_BINDING; + let image = images.add(image); - Self { - gpu_buffer, - cpu_buffer, - } - } + // Spawn the readback components. For each frame, the data will be read back from the GPU + // asynchronously and trigger the `ReadbackComplete` event on this entity. Despawn the entity + // to stop reading back the data. + commands.spawn(Readback::buffer(buffer.clone())).observe( + |trigger: Trigger| { + // This matches the type which was used to create the `ShaderStorageBuffer` above, + // and is a convenient way to interpret the data. + let data: Vec = trigger.event().to_shader_type(); + info!("Buffer {:?}", data); + }, + ); + // This is just a simple way to pass the buffer handle to the render app for our compute node + commands.insert_resource(ReadbackBuffer(buffer)); + + // Textures can also be read back from the GPU. Pay careful attention to the format of the + // texture, as it will affect how the data is interpreted. + commands.spawn(Readback::texture(image.clone())).observe( + |trigger: Trigger| { + // You probably want to interpret the data as a color rather than a `ShaderType`, + // but in this case we know the data is a single channel storage texture, so we can + // interpret it as a `Vec` + let data: Vec = trigger.event().to_shader_type(); + info!("Image {:?}", data); + }, + ); + commands.insert_resource(ReadbackImage(image)); } #[derive(Resource)] @@ -148,18 +134,20 @@ fn prepare_bind_group( mut commands: Commands, pipeline: Res, render_device: Res, - buffers: Res, + buffer: Res, + image: Res, + buffers: Res>, + images: Res>, ) { + let buffer = buffers.get(&buffer.0).unwrap(); + let image = images.get(&image.0).unwrap(); let bind_group = render_device.create_bind_group( None, &pipeline.layout, - &BindGroupEntries::single( - buffers - .gpu_buffer - .binding() - // We already did it when creating the buffer so this should never happen - .expect("Buffer should have already been uploaded to the gpu"), - ), + &BindGroupEntries::sequential(( + buffer.buffer.as_entire_buffer_binding(), + image.texture_view.into_binding(), + )), ); commands.insert_resource(GpuBufferBindGroup(bind_group)); } @@ -175,9 +163,12 @@ impl FromWorld for ComputePipeline { let render_device = world.resource::(); let layout = render_device.create_bind_group_layout( None, - &BindGroupLayoutEntries::single( + &BindGroupLayoutEntries::sequential( ShaderStages::COMPUTE, - storage_buffer::>(false), + ( + storage_buffer::>(false), + texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::WriteOnly), + ), ), ); let shader = world.load_asset(SHADER_ASSET_PATH); @@ -194,76 +185,6 @@ impl FromWorld for ComputePipeline { } } -fn map_and_read_buffer( - render_device: Res, - buffers: Res, - sender: Res, -) { - // Finally time to get our data back from the gpu. - // First we get a buffer slice which represents a chunk of the buffer (which we - // can't access yet). - // We want the whole thing so use unbounded range. - let buffer_slice = buffers.cpu_buffer.slice(..); - - // Now things get complicated. WebGPU, for safety reasons, only allows either the GPU - // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means - // flipping ownership of the buffer over to the CPU and making access legal. We do this - // with `BufferSlice::map_async`. - // - // The problem is that map_async is not an async function so we can't await it. What - // we need to do instead is pass in a closure that will be executed when the slice is - // either mapped or the mapping has failed. - // - // The problem with this is that we don't have a reliable way to wait in the main - // code for the buffer to be mapped and even worse, calling get_mapped_range or - // get_mapped_range_mut prematurely will cause a panic, not return an error. - // - // Using channels solves this as awaiting the receiving of a message from - // the passed closure will force the outside code to wait. It also doesn't hurt - // if the closure finishes before the outside code catches up as the message is - // buffered and receiving will just pick that up. - // - // It may also be worth noting that although on native, the usage of asynchronous - // channels is wholly unnecessary, for the sake of portability to Wasm - // we'll use async channels that work on both native and Wasm. - - let (s, r) = crossbeam_channel::unbounded::<()>(); - - // Maps the buffer so it can be read on the cpu - buffer_slice.map_async(MapMode::Read, move |r| match r { - // This will execute once the gpu is ready, so after the call to poll() - Ok(_) => s.send(()).expect("Failed to send map update"), - Err(err) => panic!("Failed to map buffer {err}"), - }); - - // In order for the mapping to be completed, one of three things must happen. - // One of those can be calling `Device::poll`. This isn't necessary on the web as devices - // are polled automatically but natively, we need to make sure this happens manually. - // `Maintain::Wait` will cause the thread to wait on native but not on WebGpu. - - // This blocks until the gpu is done executing everything - render_device.poll(Maintain::wait()).panic_on_timeout(); - - // This blocks until the buffer is mapped - r.recv().expect("Failed to receive the map_async message"); - - { - let buffer_view = buffer_slice.get_mapped_range(); - let data = buffer_view - .chunks(size_of::()) - .map(|chunk| u32::from_ne_bytes(chunk.try_into().expect("should be a u32"))) - .collect::>(); - sender - .send(data) - .expect("Failed to send data to main world"); - } - - // We need to make sure all `BufferView`'s are dropped before we do what we're about - // to do. - // Unmap so that we can copy to the staging buffer in the next iteration. - buffers.cpu_buffer.unmap(); -} - /// Label to identify the node in the render graph #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] struct ComputeNodeLabel; @@ -295,20 +216,6 @@ impl render_graph::Node for ComputeNode { pass.set_pipeline(init_pipeline); pass.dispatch_workgroups(BUFFER_LEN as u32, 1, 1); } - - // Copy the gpu accessible buffer to the cpu accessible buffer - let buffers = world.resource::(); - render_context.command_encoder().copy_buffer_to_buffer( - buffers - .gpu_buffer - .buffer() - .expect("Buffer should have already been uploaded to the gpu"), - 0, - &buffers.cpu_buffer, - 0, - (BUFFER_LEN * size_of::()) as u64, - ); - Ok(()) } }