Use 3 bits of PipelineKey to store MSAA sample count (#5826)

Sample count always power of two. Thus, it is enough to store `log2(sample_count)`.
This can be implemented using [u32::trailing_zeros](https://doc.rust-lang.org/stable/std/primitive.u32.html#method.trailing_zeros). Then we can restore sample count with the `1 << stored`.
You get 3 bits instead of 6 and up to 128x MSAA. This is more than is supported by any common hardware.

Full table of possible variations:

```
    original MSAA sample count      stored    loaded
* 00000000000000000000000000000000 -> 000 -> 00000001  1
  00000000000000000000000000000001 -> 000 -> 00000001  1
  00000000000000000000000000000010 -> 001 -> 00000010  2
  00000000000000000000000000000100 -> 010 -> 00000100  4
  00000000000000000000000000001000 -> 011 -> 00001000  8
  00000000000000000000000000010000 -> 100 -> 00010000  16
  00000000000000000000000000100000 -> 101 -> 00100000  32
  00000000000000000000000001000000 -> 110 -> 01000000  64
  00000000000000000000000010000000 -> 111 -> 10000000  128
* 00000000000000000000000100000000 -> 000 -> 00000001  256
* 00000000000000000000001000000000 -> 001 -> 00000010  512
* 00000000000000000000010000000000 -> 010 -> 00000100  1024
* 00000000000000000000100000000000 -> 011 -> 00001000  2048
* 00000000000000000001000000000000 -> 100 -> 00010000  4096
* 00000000000000000010000000000000 -> 101 -> 00100000  8192
* 00000000000000000100000000000000 -> 110 -> 01000000  16384
* 00000000000000001000000000000000 -> 111 -> 10000000  32768
* 00000000000000010000000000000000 -> 000 -> 00000001  65536
* 00000000000000100000000000000000 -> 001 -> 00000010  131072
* 00000000000001000000000000000000 -> 010 -> 00000100  262144
* 00000000000010000000000000000000 -> 011 -> 00001000  524288
* 00000000000100000000000000000000 -> 100 -> 00010000  1048576
* 00000000001000000000000000000000 -> 101 -> 00100000  2097152
* 00000000010000000000000000000000 -> 110 -> 01000000  4194304
* 00000000100000000000000000000000 -> 111 -> 10000000  8388608
* 00000001000000000000000000000000 -> 000 -> 00000001  16777216
* 00000010000000000000000000000000 -> 001 -> 00000010  33554432
* 00000100000000000000000000000000 -> 010 -> 00000100  67108864
* 00001000000000000000000000000000 -> 011 -> 00001000  134217728
* 00010000000000000000000000000000 -> 100 -> 00010000  268435456
* 00100000000000000000000000000000 -> 101 -> 00100000  536870912
* 01000000000000000000000000000000 -> 110 -> 01000000  1073741824
* 10000000000000000000000000000000 -> 111 -> 10000000  2147483648
```
This commit is contained in:
Lain-dono 2022-08-30 03:00:39 +00:00
parent 9dd5b5354f
commit 24e5e10cd4
3 changed files with 29 additions and 26 deletions

View file

@ -500,35 +500,36 @@ impl MeshPipeline {
bitflags::bitflags! {
#[repr(transparent)]
// NOTE: Apparently quadro drivers support up to 64x MSAA.
/// MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA.
/// MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA.
pub struct MeshPipelineKey: u32 {
const NONE = 0;
const TRANSPARENT_MAIN_PASS = (1 << 0);
const MSAA_RESERVED_BITS = MeshPipelineKey::MSAA_MASK_BITS << MeshPipelineKey::MSAA_SHIFT_BITS;
const PRIMITIVE_TOPOLOGY_RESERVED_BITS = MeshPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS << MeshPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS;
const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Self::PRIMITIVE_TOPOLOGY_MASK_BITS << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
}
}
impl MeshPipelineKey {
const MSAA_MASK_BITS: u32 = 0b111111;
const MSAA_SHIFT_BITS: u32 = 32 - 6;
const MSAA_MASK_BITS: u32 = 0b111;
const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones();
const PRIMITIVE_TOPOLOGY_MASK_BITS: u32 = 0b111;
const PRIMITIVE_TOPOLOGY_SHIFT_BITS: u32 = Self::MSAA_SHIFT_BITS - 3;
pub fn from_msaa_samples(msaa_samples: u32) -> Self {
let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
MeshPipelineKey::from_bits(msaa_bits).unwrap()
let msaa_bits =
(msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
Self::from_bits(msaa_bits).unwrap()
}
pub fn msaa_samples(&self) -> u32 {
((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1
1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS)
}
pub fn from_primitive_topology(primitive_topology: PrimitiveTopology) -> Self {
let primitive_topology_bits = ((primitive_topology as u32)
& Self::PRIMITIVE_TOPOLOGY_MASK_BITS)
<< Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
MeshPipelineKey::from_bits(primitive_topology_bits).unwrap()
Self::from_bits(primitive_topology_bits).unwrap()
}
pub fn primitive_topology(&self) -> PrimitiveTopology {
@ -923,7 +924,7 @@ mod tests {
use super::MeshPipelineKey;
#[test]
fn mesh_key_msaa_samples() {
for i in 1..=64 {
for i in [1, 2, 4, 8, 16, 32, 64, 128] {
assert_eq!(MeshPipelineKey::from_msaa_samples(i).msaa_samples(), i);
}
}

View file

@ -269,35 +269,36 @@ impl Mesh2dPipeline {
bitflags::bitflags! {
#[repr(transparent)]
// NOTE: Apparently quadro drivers support up to 64x MSAA.
// MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA.
// MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA.
// FIXME: make normals optional?
pub struct Mesh2dPipelineKey: u32 {
const NONE = 0;
const MSAA_RESERVED_BITS = Mesh2dPipelineKey::MSAA_MASK_BITS << Mesh2dPipelineKey::MSAA_SHIFT_BITS;
const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Mesh2dPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS << Mesh2dPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS;
const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Self::PRIMITIVE_TOPOLOGY_MASK_BITS << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
}
}
impl Mesh2dPipelineKey {
const MSAA_MASK_BITS: u32 = 0b111111;
const MSAA_SHIFT_BITS: u32 = 32 - 6;
const MSAA_MASK_BITS: u32 = 0b111;
const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones();
const PRIMITIVE_TOPOLOGY_MASK_BITS: u32 = 0b111;
const PRIMITIVE_TOPOLOGY_SHIFT_BITS: u32 = Self::MSAA_SHIFT_BITS - 3;
pub fn from_msaa_samples(msaa_samples: u32) -> Self {
let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
Mesh2dPipelineKey::from_bits(msaa_bits).unwrap()
let msaa_bits =
(msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
Self::from_bits(msaa_bits).unwrap()
}
pub fn msaa_samples(&self) -> u32 {
((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1
1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS)
}
pub fn from_primitive_topology(primitive_topology: PrimitiveTopology) -> Self {
let primitive_topology_bits = ((primitive_topology as u32)
& Self::PRIMITIVE_TOPOLOGY_MASK_BITS)
<< Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS;
Mesh2dPipelineKey::from_bits(primitive_topology_bits).unwrap()
Self::from_bits(primitive_topology_bits).unwrap()
}
pub fn primitive_topology(&self) -> PrimitiveTopology {

View file

@ -90,25 +90,26 @@ impl FromWorld for SpritePipeline {
bitflags::bitflags! {
#[repr(transparent)]
// NOTE: Apparently quadro drivers support up to 64x MSAA.
// MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA.
// MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA.
pub struct SpritePipelineKey: u32 {
const NONE = 0;
const COLORED = (1 << 0);
const MSAA_RESERVED_BITS = SpritePipelineKey::MSAA_MASK_BITS << SpritePipelineKey::MSAA_SHIFT_BITS;
const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS;
}
}
impl SpritePipelineKey {
const MSAA_MASK_BITS: u32 = 0b111111;
const MSAA_SHIFT_BITS: u32 = 32 - 6;
const MSAA_MASK_BITS: u32 = 0b111;
const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones();
pub fn from_msaa_samples(msaa_samples: u32) -> Self {
let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
SpritePipelineKey::from_bits(msaa_bits).unwrap()
let msaa_bits =
(msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS;
Self::from_bits(msaa_bits).unwrap()
}
pub fn msaa_samples(&self) -> u32 {
((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1
1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS)
}
}