mirror of
https://github.com/bevyengine/bevy
synced 2024-11-21 20:23:28 +00:00
Gpu readback (#15419)
# Objective Adds a new `Readback` component to request for readback of a `Handle<Image>` or `Handle<ShaderStorageBuffer>` to the CPU in a future frame. ## Solution We track the `Readback` component and allocate a target buffer to write the gpu resource into and map it back asynchronously, which then fires a trigger on the entity in the main world. This proccess is asynchronous, and generally takes a few frames. ## Showcase ```rust let mut buffer = ShaderStorageBuffer::from(vec![0u32; 16]); buffer.buffer_description.usage |= BufferUsages::COPY_SRC; let buffer = buffers.add(buffer); commands .spawn(Readback::buffer(buffer.clone())) .observe(|trigger: Trigger<ReadbackComplete>| { info!("Buffer data from previous frame {:?}", trigger.event()); }); ``` --------- Co-authored-by: Kristoffer Søholm <k.soeholm@gmail.com> Co-authored-by: IceSentry <IceSentry@users.noreply.github.com>
This commit is contained in:
parent
dd92a7705d
commit
40c26f80aa
6 changed files with 485 additions and 221 deletions
|
@ -3,10 +3,13 @@
|
|||
|
||||
// This is the data that lives in the gpu only buffer
|
||||
@group(0) @binding(0) var<storage, read_write> data: array<u32>;
|
||||
@group(0) @binding(1) var texture: texture_storage_2d<r32uint, write>;
|
||||
|
||||
@compute @workgroup_size(1)
|
||||
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
||||
// We use the global_id to index the array to make sure we don't
|
||||
// access data used in another workgroup
|
||||
data[global_id.x] += 1u;
|
||||
// Write the same data to the texture
|
||||
textureStore(texture, vec2<i32>(i32(global_id.x), 0), vec4<u32>(data[global_id.x], 0, 0, 0));
|
||||
}
|
||||
|
|
370
crates/bevy_render/src/gpu_readback.rs
Normal file
370
crates/bevy_render/src/gpu_readback.rs
Normal file
|
@ -0,0 +1,370 @@
|
|||
use crate::{
|
||||
extract_component::ExtractComponentPlugin,
|
||||
prelude::Image,
|
||||
render_asset::RenderAssets,
|
||||
render_resource::{Buffer, BufferUsages, Extent3d, ImageDataLayout, Texture, TextureFormat},
|
||||
renderer::{render_system, RenderDevice},
|
||||
storage::{GpuShaderStorageBuffer, ShaderStorageBuffer},
|
||||
texture::{GpuImage, TextureFormatPixelInfo},
|
||||
ExtractSchedule, MainWorld, Render, RenderApp, RenderSet,
|
||||
};
|
||||
use async_channel::{Receiver, Sender};
|
||||
use bevy_app::{App, Plugin};
|
||||
use bevy_asset::Handle;
|
||||
use bevy_derive::{Deref, DerefMut};
|
||||
use bevy_ecs::schedule::IntoSystemConfigs;
|
||||
use bevy_ecs::{
|
||||
change_detection::ResMut,
|
||||
entity::Entity,
|
||||
event::Event,
|
||||
prelude::{Component, Resource, World},
|
||||
system::{Query, Res},
|
||||
};
|
||||
use bevy_reflect::Reflect;
|
||||
use bevy_render_macros::ExtractComponent;
|
||||
use bevy_utils::{default, tracing::warn, HashMap};
|
||||
use encase::internal::ReadFrom;
|
||||
use encase::private::Reader;
|
||||
use encase::ShaderType;
|
||||
use wgpu::{CommandEncoder, COPY_BYTES_PER_ROW_ALIGNMENT};
|
||||
|
||||
/// A plugin that enables reading back gpu buffers and textures to the cpu.
|
||||
pub struct GpuReadbackPlugin {
|
||||
/// Describes the number of frames a buffer can be unused before it is removed from the pool in
|
||||
/// order to avoid unnecessary reallocations.
|
||||
max_unused_frames: usize,
|
||||
}
|
||||
|
||||
impl Default for GpuReadbackPlugin {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_unused_frames: 10,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Plugin for GpuReadbackPlugin {
|
||||
fn build(&self, app: &mut App) {
|
||||
app.add_plugins(ExtractComponentPlugin::<Readback>::default());
|
||||
|
||||
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
|
||||
render_app
|
||||
.init_resource::<GpuReadbackBufferPool>()
|
||||
.init_resource::<GpuReadbacks>()
|
||||
.insert_resource(GpuReadbackMaxUnusedFrames(self.max_unused_frames))
|
||||
.add_systems(ExtractSchedule, sync_readbacks.ambiguous_with_all())
|
||||
.add_systems(
|
||||
Render,
|
||||
(
|
||||
prepare_buffers.in_set(RenderSet::PrepareResources),
|
||||
map_buffers.after(render_system).in_set(RenderSet::Render),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A component that registers the wrapped handle for gpu readback, either a texture or a buffer.
|
||||
///
|
||||
/// Data is read asynchronously and will be triggered on the entity via the [`ReadbackComplete`] event
|
||||
/// when complete. If this component is not removed, the readback will be attempted every frame
|
||||
#[derive(Component, ExtractComponent, Clone, Debug)]
|
||||
pub enum Readback {
|
||||
Texture(Handle<Image>),
|
||||
Buffer(Handle<ShaderStorageBuffer>),
|
||||
}
|
||||
|
||||
impl Readback {
|
||||
/// Create a readback component for a texture using the given handle.
|
||||
pub fn texture(image: Handle<Image>) -> Self {
|
||||
Self::Texture(image)
|
||||
}
|
||||
|
||||
/// Create a readback component for a buffer using the given handle.
|
||||
pub fn buffer(buffer: Handle<ShaderStorageBuffer>) -> Self {
|
||||
Self::Buffer(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// An event that is triggered when a gpu readback is complete.
|
||||
///
|
||||
/// The event contains the data as a `Vec<u8>`, which can be interpreted as the raw bytes of the
|
||||
/// requested buffer or texture.
|
||||
#[derive(Event, Deref, DerefMut, Reflect, Debug)]
|
||||
#[reflect(Debug)]
|
||||
pub struct ReadbackComplete(pub Vec<u8>);
|
||||
|
||||
impl ReadbackComplete {
|
||||
/// Convert the raw bytes of the event to a shader type.
|
||||
pub fn to_shader_type<T: ShaderType + ReadFrom + Default>(&self) -> T {
|
||||
let mut val = T::default();
|
||||
let mut reader = Reader::new::<T>(&self.0, 0).expect("Failed to create Reader");
|
||||
T::read_from(&mut val, &mut reader);
|
||||
val
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Resource)]
|
||||
struct GpuReadbackMaxUnusedFrames(usize);
|
||||
|
||||
struct GpuReadbackBuffer {
|
||||
buffer: Buffer,
|
||||
taken: bool,
|
||||
frames_unused: usize,
|
||||
}
|
||||
|
||||
#[derive(Resource, Default)]
|
||||
struct GpuReadbackBufferPool {
|
||||
// Map of buffer size to list of buffers, with a flag for whether the buffer is taken and how
|
||||
// many frames it has been unused for.
|
||||
// TODO: We could ideally write all readback data to one big buffer per frame, the assumption
|
||||
// here is that very few entities well actually be read back at once, and their size is
|
||||
// unlikely to change.
|
||||
buffers: HashMap<u64, Vec<GpuReadbackBuffer>>,
|
||||
}
|
||||
|
||||
impl GpuReadbackBufferPool {
|
||||
fn get(&mut self, render_device: &RenderDevice, size: u64) -> Buffer {
|
||||
let buffers = self.buffers.entry(size).or_default();
|
||||
|
||||
// find an untaken buffer for this size
|
||||
if let Some(buf) = buffers.iter_mut().find(|x| !x.taken) {
|
||||
buf.taken = true;
|
||||
buf.frames_unused = 0;
|
||||
return buf.buffer.clone();
|
||||
}
|
||||
|
||||
let buffer = render_device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("Readback Buffer"),
|
||||
size,
|
||||
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
buffers.push(GpuReadbackBuffer {
|
||||
buffer: buffer.clone(),
|
||||
taken: true,
|
||||
frames_unused: 0,
|
||||
});
|
||||
buffer
|
||||
}
|
||||
|
||||
// Returns the buffer to the pool so it can be used in a future frame
|
||||
fn return_buffer(&mut self, buffer: &Buffer) {
|
||||
let size = buffer.size();
|
||||
let buffers = self
|
||||
.buffers
|
||||
.get_mut(&size)
|
||||
.expect("Returned buffer of untracked size");
|
||||
if let Some(buf) = buffers.iter_mut().find(|x| x.buffer.id() == buffer.id()) {
|
||||
buf.taken = false;
|
||||
} else {
|
||||
warn!("Returned buffer that was not allocated");
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, max_unused_frames: usize) {
|
||||
for (_, buffers) in &mut self.buffers {
|
||||
// Tick all the buffers
|
||||
for buf in &mut *buffers {
|
||||
if !buf.taken {
|
||||
buf.frames_unused += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove buffers that haven't been used for MAX_UNUSED_FRAMES
|
||||
buffers.retain(|x| x.frames_unused < max_unused_frames);
|
||||
}
|
||||
|
||||
// Remove empty buffer sizes
|
||||
self.buffers.retain(|_, buffers| !buffers.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
enum ReadbackSource {
|
||||
Texture {
|
||||
texture: Texture,
|
||||
layout: ImageDataLayout,
|
||||
size: Extent3d,
|
||||
},
|
||||
Buffer {
|
||||
src_start: u64,
|
||||
dst_start: u64,
|
||||
buffer: Buffer,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Resource, Default)]
|
||||
struct GpuReadbacks {
|
||||
requested: Vec<GpuReadback>,
|
||||
mapped: Vec<GpuReadback>,
|
||||
}
|
||||
|
||||
struct GpuReadback {
|
||||
pub entity: Entity,
|
||||
pub src: ReadbackSource,
|
||||
pub buffer: Buffer,
|
||||
pub rx: Receiver<(Entity, Buffer, Vec<u8>)>,
|
||||
pub tx: Sender<(Entity, Buffer, Vec<u8>)>,
|
||||
}
|
||||
|
||||
fn sync_readbacks(
|
||||
mut main_world: ResMut<MainWorld>,
|
||||
mut buffer_pool: ResMut<GpuReadbackBufferPool>,
|
||||
mut readbacks: ResMut<GpuReadbacks>,
|
||||
max_unused_frames: Res<GpuReadbackMaxUnusedFrames>,
|
||||
) {
|
||||
readbacks.mapped.retain(|readback| {
|
||||
if let Ok((entity, buffer, result)) = readback.rx.try_recv() {
|
||||
main_world.trigger_targets(ReadbackComplete(result), entity);
|
||||
buffer_pool.return_buffer(&buffer);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
|
||||
buffer_pool.update(max_unused_frames.0);
|
||||
}
|
||||
|
||||
fn prepare_buffers(
|
||||
render_device: Res<RenderDevice>,
|
||||
mut readbacks: ResMut<GpuReadbacks>,
|
||||
mut buffer_pool: ResMut<GpuReadbackBufferPool>,
|
||||
gpu_images: Res<RenderAssets<GpuImage>>,
|
||||
ssbos: Res<RenderAssets<GpuShaderStorageBuffer>>,
|
||||
handles: Query<(Entity, &Readback)>,
|
||||
) {
|
||||
for (entity, readback) in handles.iter() {
|
||||
match readback {
|
||||
Readback::Texture(image) => {
|
||||
if let Some(gpu_image) = gpu_images.get(image) {
|
||||
let size = Extent3d {
|
||||
width: gpu_image.size.x,
|
||||
height: gpu_image.size.y,
|
||||
..default()
|
||||
};
|
||||
let layout = layout_data(size.width, size.height, gpu_image.texture_format);
|
||||
let buffer = buffer_pool.get(
|
||||
&render_device,
|
||||
get_aligned_size(
|
||||
size.width,
|
||||
size.height,
|
||||
gpu_image.texture_format.pixel_size() as u32,
|
||||
) as u64,
|
||||
);
|
||||
let (tx, rx) = async_channel::bounded(1);
|
||||
readbacks.requested.push(GpuReadback {
|
||||
entity,
|
||||
src: ReadbackSource::Texture {
|
||||
texture: gpu_image.texture.clone(),
|
||||
layout,
|
||||
size,
|
||||
},
|
||||
buffer,
|
||||
rx,
|
||||
tx,
|
||||
});
|
||||
}
|
||||
}
|
||||
Readback::Buffer(buffer) => {
|
||||
if let Some(ssbo) = ssbos.get(buffer) {
|
||||
let size = ssbo.buffer.size();
|
||||
let buffer = buffer_pool.get(&render_device, size);
|
||||
let (tx, rx) = async_channel::bounded(1);
|
||||
readbacks.requested.push(GpuReadback {
|
||||
entity,
|
||||
src: ReadbackSource::Buffer {
|
||||
src_start: 0,
|
||||
dst_start: 0,
|
||||
buffer: ssbo.buffer.clone(),
|
||||
},
|
||||
buffer,
|
||||
rx,
|
||||
tx,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn submit_readback_commands(world: &World, command_encoder: &mut CommandEncoder) {
|
||||
let readbacks = world.resource::<GpuReadbacks>();
|
||||
for readback in &readbacks.requested {
|
||||
match &readback.src {
|
||||
ReadbackSource::Texture {
|
||||
texture,
|
||||
layout,
|
||||
size,
|
||||
} => {
|
||||
command_encoder.copy_texture_to_buffer(
|
||||
texture.as_image_copy(),
|
||||
wgpu::ImageCopyBuffer {
|
||||
buffer: &readback.buffer,
|
||||
layout: *layout,
|
||||
},
|
||||
*size,
|
||||
);
|
||||
}
|
||||
ReadbackSource::Buffer {
|
||||
src_start,
|
||||
dst_start,
|
||||
buffer,
|
||||
} => {
|
||||
command_encoder.copy_buffer_to_buffer(
|
||||
buffer,
|
||||
*src_start,
|
||||
&readback.buffer,
|
||||
*dst_start,
|
||||
buffer.size(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Move requested readbacks to mapped readbacks after commands have been submitted in render system
|
||||
fn map_buffers(mut readbacks: ResMut<GpuReadbacks>) {
|
||||
let requested = readbacks.requested.drain(..).collect::<Vec<GpuReadback>>();
|
||||
for readback in requested {
|
||||
let slice = readback.buffer.slice(..);
|
||||
let entity = readback.entity;
|
||||
let buffer = readback.buffer.clone();
|
||||
let tx = readback.tx.clone();
|
||||
slice.map_async(wgpu::MapMode::Read, move |res| {
|
||||
res.expect("Failed to map buffer");
|
||||
let buffer_slice = buffer.slice(..);
|
||||
let data = buffer_slice.get_mapped_range();
|
||||
let result = Vec::from(&*data);
|
||||
drop(data);
|
||||
buffer.unmap();
|
||||
if let Err(e) = tx.try_send((entity, buffer, result)) {
|
||||
warn!("Failed to send readback result: {:?}", e);
|
||||
}
|
||||
});
|
||||
readbacks.mapped.push(readback);
|
||||
}
|
||||
}
|
||||
|
||||
// Utils
|
||||
|
||||
pub(crate) fn align_byte_size(value: u32) -> u32 {
|
||||
value + (COPY_BYTES_PER_ROW_ALIGNMENT - (value % COPY_BYTES_PER_ROW_ALIGNMENT))
|
||||
}
|
||||
|
||||
pub(crate) fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
|
||||
height * align_byte_size(width * pixel_size)
|
||||
}
|
||||
|
||||
pub(crate) fn layout_data(width: u32, height: u32, format: TextureFormat) -> ImageDataLayout {
|
||||
ImageDataLayout {
|
||||
bytes_per_row: if height > 1 {
|
||||
// 1 = 1 row
|
||||
Some(get_aligned_size(width, 1, format.pixel_size() as u32))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
rows_per_image: None,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ mod extract_param;
|
|||
pub mod extract_resource;
|
||||
pub mod globals;
|
||||
pub mod gpu_component_array_buffer;
|
||||
pub mod gpu_readback;
|
||||
pub mod mesh;
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub mod pipelined_rendering;
|
||||
|
@ -73,6 +74,7 @@ use globals::GlobalsPlugin;
|
|||
use render_asset::RenderAssetBytesPerFrame;
|
||||
use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue};
|
||||
|
||||
use crate::gpu_readback::GpuReadbackPlugin;
|
||||
use crate::{
|
||||
camera::CameraPlugin,
|
||||
mesh::{morph::MorphPlugin, MeshPlugin, RenderMesh},
|
||||
|
@ -363,6 +365,7 @@ impl Plugin for RenderPlugin {
|
|||
MorphPlugin,
|
||||
BatchingPlugin,
|
||||
StoragePlugin,
|
||||
GpuReadbackPlugin::default(),
|
||||
));
|
||||
|
||||
app.init_resource::<RenderAssetBytesPerFrame>()
|
||||
|
|
|
@ -46,6 +46,7 @@ pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, Wi
|
|||
world,
|
||||
|encoder| {
|
||||
crate::view::screenshot::submit_screenshot_commands(world, encoder);
|
||||
crate::gpu_readback::submit_readback_commands(world, encoder);
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use super::ExtractedWindows;
|
||||
use crate::{
|
||||
camera::{ManualTextureViewHandle, ManualTextureViews, NormalizedRenderTarget, RenderTarget},
|
||||
gpu_readback,
|
||||
prelude::{Image, Shader},
|
||||
render_asset::{RenderAssetUsages, RenderAssets},
|
||||
render_resource::{
|
||||
|
@ -38,9 +39,7 @@ use std::{
|
|||
Mutex,
|
||||
},
|
||||
};
|
||||
use wgpu::{
|
||||
CommandEncoder, Extent3d, ImageDataLayout, TextureFormat, COPY_BYTES_PER_ROW_ALIGNMENT,
|
||||
};
|
||||
use wgpu::{CommandEncoder, Extent3d, TextureFormat};
|
||||
|
||||
#[derive(Event, Deref, DerefMut, Reflect, Debug)]
|
||||
#[reflect(Debug)]
|
||||
|
@ -376,7 +375,8 @@ fn prepare_screenshot_state(
|
|||
let texture_view = texture.create_view(&Default::default());
|
||||
let buffer = render_device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("screenshot-transfer-buffer"),
|
||||
size: get_aligned_size(size.width, size.height, format.pixel_size() as u32) as u64,
|
||||
size: gpu_readback::get_aligned_size(size.width, size.height, format.pixel_size() as u32)
|
||||
as u64,
|
||||
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
@ -445,27 +445,6 @@ impl Plugin for ScreenshotPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn align_byte_size(value: u32) -> u32 {
|
||||
value + (COPY_BYTES_PER_ROW_ALIGNMENT - (value % COPY_BYTES_PER_ROW_ALIGNMENT))
|
||||
}
|
||||
|
||||
pub(crate) fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
|
||||
height * align_byte_size(width * pixel_size)
|
||||
}
|
||||
|
||||
pub(crate) fn layout_data(width: u32, height: u32, format: TextureFormat) -> ImageDataLayout {
|
||||
ImageDataLayout {
|
||||
bytes_per_row: if height > 1 {
|
||||
// 1 = 1 row
|
||||
Some(get_aligned_size(width, 1, format.pixel_size() as u32))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
rows_per_image: None,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Resource)]
|
||||
pub struct ScreenshotToScreenPipeline {
|
||||
pub bind_group_layout: BindGroupLayout,
|
||||
|
@ -619,7 +598,7 @@ fn render_screenshot(
|
|||
prepared_state.texture.as_image_copy(),
|
||||
wgpu::ImageCopyBuffer {
|
||||
buffer: &prepared_state.buffer,
|
||||
layout: layout_data(width, height, texture_format),
|
||||
layout: gpu_readback::layout_data(width, height, texture_format),
|
||||
},
|
||||
Extent3d {
|
||||
width,
|
||||
|
@ -687,7 +666,8 @@ pub(crate) fn collect_screenshots(world: &mut World) {
|
|||
// Our buffer has been padded because we needed to align to a multiple of 256.
|
||||
// We remove this padding here
|
||||
let initial_row_bytes = width as usize * pixel_size;
|
||||
let buffered_row_bytes = align_byte_size(width * pixel_size as u32) as usize;
|
||||
let buffered_row_bytes =
|
||||
gpu_readback::align_byte_size(width * pixel_size as u32) as usize;
|
||||
|
||||
let mut take_offset = buffered_row_bytes;
|
||||
let mut place_offset = initial_row_bytes;
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
//! A very simple compute shader that updates a gpu buffer.
|
||||
//! That buffer is then copied to the cpu and sent to the main world.
|
||||
//!
|
||||
//! This example is not meant to teach compute shaders.
|
||||
//! It is only meant to explain how to read a gpu buffer on the cpu and then use it in the main world.
|
||||
//!
|
||||
//! The code is based on this wgpu example:
|
||||
//! <https://github.com/gfx-rs/wgpu/blob/fb305b85f692f3fbbd9509b648dfbc97072f7465/examples/src/repeated_compute/mod.rs>
|
||||
//! Simple example demonstrating the use of the [`Readback`] component to read back data from the GPU
|
||||
//! using both a storage buffer and texture.
|
||||
|
||||
use bevy::{
|
||||
prelude::*,
|
||||
render::{
|
||||
render_graph::{self, RenderGraph, RenderLabel},
|
||||
render_resource::{binding_types::storage_buffer, *},
|
||||
renderer::{RenderContext, RenderDevice, RenderQueue},
|
||||
extract_resource::{ExtractResource, ExtractResourcePlugin},
|
||||
gpu_readback::{Readback, ReadbackComplete},
|
||||
render_asset::{RenderAssetUsages, RenderAssets},
|
||||
render_graph,
|
||||
render_graph::{RenderGraph, RenderLabel},
|
||||
render_resource::{
|
||||
binding_types::{storage_buffer, texture_storage_2d},
|
||||
*,
|
||||
},
|
||||
renderer::{RenderContext, RenderDevice},
|
||||
storage::{GpuShaderStorageBuffer, ShaderStorageBuffer},
|
||||
texture::GpuImage,
|
||||
Render, RenderApp, RenderSet,
|
||||
},
|
||||
};
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
|
||||
/// This example uses a shader source file from the assets subdirectory
|
||||
const SHADER_ASSET_PATH: &str = "shaders/gpu_readback.wgsl";
|
||||
|
@ -24,66 +26,33 @@ const SHADER_ASSET_PATH: &str = "shaders/gpu_readback.wgsl";
|
|||
// The length of the buffer sent to the gpu
|
||||
const BUFFER_LEN: usize = 16;
|
||||
|
||||
// To communicate between the main world and the render world we need a channel.
|
||||
// Since the main world and render world run in parallel, there will always be a frame of latency
|
||||
// between the data sent from the render world and the data received in the main world
|
||||
//
|
||||
// frame n => render world sends data through the channel at the end of the frame
|
||||
// frame n + 1 => main world receives the data
|
||||
|
||||
/// This will receive asynchronously any data sent from the render world
|
||||
#[derive(Resource, Deref)]
|
||||
struct MainWorldReceiver(Receiver<Vec<u32>>);
|
||||
|
||||
/// This will send asynchronously any data to the main world
|
||||
#[derive(Resource, Deref)]
|
||||
struct RenderWorldSender(Sender<Vec<u32>>);
|
||||
|
||||
fn main() {
|
||||
App::new()
|
||||
.add_plugins((
|
||||
DefaultPlugins,
|
||||
GpuReadbackPlugin,
|
||||
ExtractResourcePlugin::<ReadbackBuffer>::default(),
|
||||
ExtractResourcePlugin::<ReadbackImage>::default(),
|
||||
))
|
||||
.insert_resource(ClearColor(Color::BLACK))
|
||||
.add_plugins((DefaultPlugins, GpuReadbackPlugin))
|
||||
.add_systems(Update, receive)
|
||||
.add_systems(Startup, setup)
|
||||
.run();
|
||||
}
|
||||
|
||||
/// This system will poll the channel and try to get the data sent from the render world
|
||||
fn receive(receiver: Res<MainWorldReceiver>) {
|
||||
// We don't want to block the main world on this,
|
||||
// so we use try_recv which attempts to receive without blocking
|
||||
if let Ok(data) = receiver.try_recv() {
|
||||
println!("Received data from render world: {data:?}");
|
||||
}
|
||||
}
|
||||
|
||||
// We need a plugin to organize all the systems and render node required for this example
|
||||
struct GpuReadbackPlugin;
|
||||
impl Plugin for GpuReadbackPlugin {
|
||||
fn build(&self, _app: &mut App) {}
|
||||
|
||||
// The render device is only accessible inside finish().
|
||||
// So we need to initialize render resources here.
|
||||
fn finish(&self, app: &mut App) {
|
||||
let (s, r) = crossbeam_channel::unbounded();
|
||||
app.insert_resource(MainWorldReceiver(r));
|
||||
|
||||
let render_app = app.sub_app_mut(RenderApp);
|
||||
render_app
|
||||
.insert_resource(RenderWorldSender(s))
|
||||
.init_resource::<ComputePipeline>()
|
||||
.init_resource::<Buffers>()
|
||||
.add_systems(
|
||||
Render,
|
||||
(
|
||||
prepare_bind_group
|
||||
.in_set(RenderSet::PrepareBindGroups)
|
||||
// We don't need to recreate the bind group every frame
|
||||
.run_if(not(resource_exists::<GpuBufferBindGroup>)),
|
||||
// We need to run it after the render graph is done
|
||||
// because this needs to happen after submit()
|
||||
map_and_read_buffer.after(RenderSet::Render),
|
||||
),
|
||||
);
|
||||
render_app.init_resource::<ComputePipeline>().add_systems(
|
||||
Render,
|
||||
prepare_bind_group
|
||||
.in_set(RenderSet::PrepareBindGroups)
|
||||
// We don't need to recreate the bind group every frame
|
||||
.run_if(not(resource_exists::<GpuBufferBindGroup>)),
|
||||
);
|
||||
|
||||
// Add the compute node as a top level node to the render graph
|
||||
// This means it will only execute once per frame
|
||||
|
@ -94,51 +63,68 @@ impl Plugin for GpuReadbackPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
/// Holds the buffers that will be used to communicate between the cpu and gpu
|
||||
#[derive(Resource)]
|
||||
struct Buffers {
|
||||
/// The buffer that will be used by the compute shader
|
||||
///
|
||||
/// In this example, we want to write a `Vec<u32>` to a `Buffer`. `BufferVec` is a wrapper around a `Buffer`
|
||||
/// that will make sure the data is correctly aligned for the gpu and will simplify uploading the data to the gpu.
|
||||
gpu_buffer: BufferVec<u32>,
|
||||
/// The buffer that will be read on the cpu.
|
||||
/// The `gpu_buffer` will be copied to this buffer every frame
|
||||
cpu_buffer: Buffer,
|
||||
}
|
||||
#[derive(Resource, ExtractResource, Clone)]
|
||||
struct ReadbackBuffer(Handle<ShaderStorageBuffer>);
|
||||
|
||||
impl FromWorld for Buffers {
|
||||
fn from_world(world: &mut World) -> Self {
|
||||
let render_device = world.resource::<RenderDevice>();
|
||||
let render_queue = world.resource::<RenderQueue>();
|
||||
#[derive(Resource, ExtractResource, Clone)]
|
||||
struct ReadbackImage(Handle<Image>);
|
||||
|
||||
// Create the buffer that will be accessed by the gpu
|
||||
let mut gpu_buffer = BufferVec::new(BufferUsages::STORAGE | BufferUsages::COPY_SRC);
|
||||
for _ in 0..BUFFER_LEN {
|
||||
// Init the buffer with zeroes
|
||||
gpu_buffer.push(0);
|
||||
}
|
||||
// Write the buffer so the data is accessible on the gpu
|
||||
gpu_buffer.write_buffer(render_device, render_queue);
|
||||
fn setup(
|
||||
mut commands: Commands,
|
||||
mut images: ResMut<Assets<Image>>,
|
||||
mut buffers: ResMut<Assets<ShaderStorageBuffer>>,
|
||||
) {
|
||||
// Create a storage buffer with some data
|
||||
let buffer = vec![0u32; BUFFER_LEN];
|
||||
let mut buffer = ShaderStorageBuffer::from(buffer);
|
||||
// We need to enable the COPY_SRC usage so we can copy the buffer to the cpu
|
||||
buffer.buffer_description.usage |= BufferUsages::COPY_SRC;
|
||||
let buffer = buffers.add(buffer);
|
||||
|
||||
// For portability reasons, WebGPU draws a distinction between memory that is
|
||||
// accessible by the CPU and memory that is accessible by the GPU. Only
|
||||
// buffers accessible by the CPU can be mapped and accessed by the CPU and
|
||||
// only buffers visible to the GPU can be used in shaders. In order to get
|
||||
// data from the GPU, we need to use `CommandEncoder::copy_buffer_to_buffer` to
|
||||
// copy the buffer modified by the GPU into a mappable, CPU-accessible buffer
|
||||
let cpu_buffer = render_device.create_buffer(&BufferDescriptor {
|
||||
label: Some("readback_buffer"),
|
||||
size: (BUFFER_LEN * size_of::<u32>()) as u64,
|
||||
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
// Create a storage texture with some data
|
||||
let size = Extent3d {
|
||||
width: BUFFER_LEN as u32,
|
||||
height: 1,
|
||||
..default()
|
||||
};
|
||||
let mut image = Image::new_fill(
|
||||
size,
|
||||
TextureDimension::D2,
|
||||
&[0, 0, 0, 0],
|
||||
TextureFormat::R32Uint,
|
||||
RenderAssetUsages::RENDER_WORLD,
|
||||
);
|
||||
// We also need to enable the COPY_SRC, as well as STORAGE_BINDING so we can use it in the
|
||||
// compute shader
|
||||
image.texture_descriptor.usage |= TextureUsages::COPY_SRC | TextureUsages::STORAGE_BINDING;
|
||||
let image = images.add(image);
|
||||
|
||||
Self {
|
||||
gpu_buffer,
|
||||
cpu_buffer,
|
||||
}
|
||||
}
|
||||
// Spawn the readback components. For each frame, the data will be read back from the GPU
|
||||
// asynchronously and trigger the `ReadbackComplete` event on this entity. Despawn the entity
|
||||
// to stop reading back the data.
|
||||
commands.spawn(Readback::buffer(buffer.clone())).observe(
|
||||
|trigger: Trigger<ReadbackComplete>| {
|
||||
// This matches the type which was used to create the `ShaderStorageBuffer` above,
|
||||
// and is a convenient way to interpret the data.
|
||||
let data: Vec<u32> = trigger.event().to_shader_type();
|
||||
info!("Buffer {:?}", data);
|
||||
},
|
||||
);
|
||||
// This is just a simple way to pass the buffer handle to the render app for our compute node
|
||||
commands.insert_resource(ReadbackBuffer(buffer));
|
||||
|
||||
// Textures can also be read back from the GPU. Pay careful attention to the format of the
|
||||
// texture, as it will affect how the data is interpreted.
|
||||
commands.spawn(Readback::texture(image.clone())).observe(
|
||||
|trigger: Trigger<ReadbackComplete>| {
|
||||
// You probably want to interpret the data as a color rather than a `ShaderType`,
|
||||
// but in this case we know the data is a single channel storage texture, so we can
|
||||
// interpret it as a `Vec<u32>`
|
||||
let data: Vec<u32> = trigger.event().to_shader_type();
|
||||
info!("Image {:?}", data);
|
||||
},
|
||||
);
|
||||
commands.insert_resource(ReadbackImage(image));
|
||||
}
|
||||
|
||||
#[derive(Resource)]
|
||||
|
@ -148,18 +134,20 @@ fn prepare_bind_group(
|
|||
mut commands: Commands,
|
||||
pipeline: Res<ComputePipeline>,
|
||||
render_device: Res<RenderDevice>,
|
||||
buffers: Res<Buffers>,
|
||||
buffer: Res<ReadbackBuffer>,
|
||||
image: Res<ReadbackImage>,
|
||||
buffers: Res<RenderAssets<GpuShaderStorageBuffer>>,
|
||||
images: Res<RenderAssets<GpuImage>>,
|
||||
) {
|
||||
let buffer = buffers.get(&buffer.0).unwrap();
|
||||
let image = images.get(&image.0).unwrap();
|
||||
let bind_group = render_device.create_bind_group(
|
||||
None,
|
||||
&pipeline.layout,
|
||||
&BindGroupEntries::single(
|
||||
buffers
|
||||
.gpu_buffer
|
||||
.binding()
|
||||
// We already did it when creating the buffer so this should never happen
|
||||
.expect("Buffer should have already been uploaded to the gpu"),
|
||||
),
|
||||
&BindGroupEntries::sequential((
|
||||
buffer.buffer.as_entire_buffer_binding(),
|
||||
image.texture_view.into_binding(),
|
||||
)),
|
||||
);
|
||||
commands.insert_resource(GpuBufferBindGroup(bind_group));
|
||||
}
|
||||
|
@ -175,9 +163,12 @@ impl FromWorld for ComputePipeline {
|
|||
let render_device = world.resource::<RenderDevice>();
|
||||
let layout = render_device.create_bind_group_layout(
|
||||
None,
|
||||
&BindGroupLayoutEntries::single(
|
||||
&BindGroupLayoutEntries::sequential(
|
||||
ShaderStages::COMPUTE,
|
||||
storage_buffer::<Vec<u32>>(false),
|
||||
(
|
||||
storage_buffer::<Vec<u32>>(false),
|
||||
texture_storage_2d(TextureFormat::R32Uint, StorageTextureAccess::WriteOnly),
|
||||
),
|
||||
),
|
||||
);
|
||||
let shader = world.load_asset(SHADER_ASSET_PATH);
|
||||
|
@ -194,76 +185,6 @@ impl FromWorld for ComputePipeline {
|
|||
}
|
||||
}
|
||||
|
||||
fn map_and_read_buffer(
|
||||
render_device: Res<RenderDevice>,
|
||||
buffers: Res<Buffers>,
|
||||
sender: Res<RenderWorldSender>,
|
||||
) {
|
||||
// Finally time to get our data back from the gpu.
|
||||
// First we get a buffer slice which represents a chunk of the buffer (which we
|
||||
// can't access yet).
|
||||
// We want the whole thing so use unbounded range.
|
||||
let buffer_slice = buffers.cpu_buffer.slice(..);
|
||||
|
||||
// Now things get complicated. WebGPU, for safety reasons, only allows either the GPU
|
||||
// or CPU to access a buffer's contents at a time. We need to "map" the buffer which means
|
||||
// flipping ownership of the buffer over to the CPU and making access legal. We do this
|
||||
// with `BufferSlice::map_async`.
|
||||
//
|
||||
// The problem is that map_async is not an async function so we can't await it. What
|
||||
// we need to do instead is pass in a closure that will be executed when the slice is
|
||||
// either mapped or the mapping has failed.
|
||||
//
|
||||
// The problem with this is that we don't have a reliable way to wait in the main
|
||||
// code for the buffer to be mapped and even worse, calling get_mapped_range or
|
||||
// get_mapped_range_mut prematurely will cause a panic, not return an error.
|
||||
//
|
||||
// Using channels solves this as awaiting the receiving of a message from
|
||||
// the passed closure will force the outside code to wait. It also doesn't hurt
|
||||
// if the closure finishes before the outside code catches up as the message is
|
||||
// buffered and receiving will just pick that up.
|
||||
//
|
||||
// It may also be worth noting that although on native, the usage of asynchronous
|
||||
// channels is wholly unnecessary, for the sake of portability to Wasm
|
||||
// we'll use async channels that work on both native and Wasm.
|
||||
|
||||
let (s, r) = crossbeam_channel::unbounded::<()>();
|
||||
|
||||
// Maps the buffer so it can be read on the cpu
|
||||
buffer_slice.map_async(MapMode::Read, move |r| match r {
|
||||
// This will execute once the gpu is ready, so after the call to poll()
|
||||
Ok(_) => s.send(()).expect("Failed to send map update"),
|
||||
Err(err) => panic!("Failed to map buffer {err}"),
|
||||
});
|
||||
|
||||
// In order for the mapping to be completed, one of three things must happen.
|
||||
// One of those can be calling `Device::poll`. This isn't necessary on the web as devices
|
||||
// are polled automatically but natively, we need to make sure this happens manually.
|
||||
// `Maintain::Wait` will cause the thread to wait on native but not on WebGpu.
|
||||
|
||||
// This blocks until the gpu is done executing everything
|
||||
render_device.poll(Maintain::wait()).panic_on_timeout();
|
||||
|
||||
// This blocks until the buffer is mapped
|
||||
r.recv().expect("Failed to receive the map_async message");
|
||||
|
||||
{
|
||||
let buffer_view = buffer_slice.get_mapped_range();
|
||||
let data = buffer_view
|
||||
.chunks(size_of::<u32>())
|
||||
.map(|chunk| u32::from_ne_bytes(chunk.try_into().expect("should be a u32")))
|
||||
.collect::<Vec<u32>>();
|
||||
sender
|
||||
.send(data)
|
||||
.expect("Failed to send data to main world");
|
||||
}
|
||||
|
||||
// We need to make sure all `BufferView`'s are dropped before we do what we're about
|
||||
// to do.
|
||||
// Unmap so that we can copy to the staging buffer in the next iteration.
|
||||
buffers.cpu_buffer.unmap();
|
||||
}
|
||||
|
||||
/// Label to identify the node in the render graph
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
|
||||
struct ComputeNodeLabel;
|
||||
|
@ -295,20 +216,6 @@ impl render_graph::Node for ComputeNode {
|
|||
pass.set_pipeline(init_pipeline);
|
||||
pass.dispatch_workgroups(BUFFER_LEN as u32, 1, 1);
|
||||
}
|
||||
|
||||
// Copy the gpu accessible buffer to the cpu accessible buffer
|
||||
let buffers = world.resource::<Buffers>();
|
||||
render_context.command_encoder().copy_buffer_to_buffer(
|
||||
buffers
|
||||
.gpu_buffer
|
||||
.buffer()
|
||||
.expect("Buffer should have already been uploaded to the gpu"),
|
||||
0,
|
||||
&buffers.cpu_buffer,
|
||||
0,
|
||||
(BUFFER_LEN * size_of::<u32>()) as u64,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue