2021-04-01 17:46:43 +00:00
|
|
|
//! Global `Arc`-based object interning infrastructure.
|
|
|
|
//!
|
|
|
|
//! Eventually this should probably be replaced with salsa-based interning.
|
|
|
|
|
|
|
|
use std::{
|
2021-05-19 13:17:57 +00:00
|
|
|
fmt::{self, Debug, Display},
|
2021-04-05 15:07:53 +00:00
|
|
|
hash::{BuildHasherDefault, Hash, Hasher},
|
2021-04-01 17:46:43 +00:00
|
|
|
ops::Deref,
|
2023-09-01 15:30:59 +00:00
|
|
|
sync::OnceLock,
|
2021-04-01 17:46:43 +00:00
|
|
|
};
|
|
|
|
|
2022-02-09 07:19:57 +00:00
|
|
|
use dashmap::{DashMap, SharedValue};
|
2023-05-24 10:54:25 +00:00
|
|
|
use hashbrown::{hash_map::RawEntryMut, HashMap};
|
2021-04-01 17:46:43 +00:00
|
|
|
use rustc_hash::FxHasher;
|
2023-05-02 14:12:22 +00:00
|
|
|
use triomphe::Arc;
|
2021-04-01 17:46:43 +00:00
|
|
|
|
|
|
|
type InternMap<T> = DashMap<Arc<T>, (), BuildHasherDefault<FxHasher>>;
|
2022-06-10 13:59:46 +00:00
|
|
|
type Guard<T> = dashmap::RwLockWriteGuard<
|
2022-02-09 07:19:57 +00:00
|
|
|
'static,
|
|
|
|
HashMap<Arc<T>, SharedValue<()>, BuildHasherDefault<FxHasher>>,
|
|
|
|
>;
|
2021-04-01 17:46:43 +00:00
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
pub struct Interned<T: Internable + ?Sized> {
|
2021-04-01 17:46:43 +00:00
|
|
|
arc: Arc<T>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: Internable> Interned<T> {
|
|
|
|
pub fn new(obj: T) -> Self {
|
2023-05-24 10:54:25 +00:00
|
|
|
let (mut shard, hash) = Self::select(&obj);
|
|
|
|
// Atomically,
|
|
|
|
// - check if `obj` is already in the map
|
|
|
|
// - if so, clone its `Arc` and return it
|
|
|
|
// - if not, box it up, insert it, and return a clone
|
|
|
|
// This needs to be atomic (locking the shard) to avoid races with other thread, which could
|
|
|
|
// insert the same object between us looking it up and inserting it.
|
2023-09-15 07:43:21 +00:00
|
|
|
match shard.raw_entry_mut().from_key_hashed_nocheck(hash, &obj) {
|
2023-05-24 10:54:25 +00:00
|
|
|
RawEntryMut::Occupied(occ) => Self { arc: occ.key().clone() },
|
|
|
|
RawEntryMut::Vacant(vac) => Self {
|
2023-09-15 07:43:21 +00:00
|
|
|
arc: vac.insert_hashed_nocheck(hash, Arc::new(obj), SharedValue::new(())).0.clone(),
|
2023-05-24 10:54:25 +00:00
|
|
|
},
|
2021-04-02 23:00:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-24 10:54:25 +00:00
|
|
|
impl Interned<str> {
|
|
|
|
pub fn new_str(s: &str) -> Self {
|
|
|
|
let (mut shard, hash) = Self::select(s);
|
2021-04-01 17:46:43 +00:00
|
|
|
// Atomically,
|
|
|
|
// - check if `obj` is already in the map
|
|
|
|
// - if so, clone its `Arc` and return it
|
|
|
|
// - if not, box it up, insert it, and return a clone
|
|
|
|
// This needs to be atomic (locking the shard) to avoid races with other thread, which could
|
|
|
|
// insert the same object between us looking it up and inserting it.
|
2023-09-15 07:43:21 +00:00
|
|
|
match shard.raw_entry_mut().from_key_hashed_nocheck(hash, s) {
|
2023-05-24 10:54:25 +00:00
|
|
|
RawEntryMut::Occupied(occ) => Self { arc: occ.key().clone() },
|
|
|
|
RawEntryMut::Vacant(vac) => Self {
|
2023-09-15 07:43:21 +00:00
|
|
|
arc: vac.insert_hashed_nocheck(hash, Arc::from(s), SharedValue::new(())).0.clone(),
|
2023-05-24 10:54:25 +00:00
|
|
|
},
|
2021-04-01 17:46:43 +00:00
|
|
|
}
|
2021-04-02 23:00:45 +00:00
|
|
|
}
|
2021-04-01 17:46:43 +00:00
|
|
|
}
|
|
|
|
|
2023-05-24 10:54:25 +00:00
|
|
|
impl<T: Internable + ?Sized> Interned<T> {
|
|
|
|
#[inline]
|
|
|
|
fn select(obj: &T) -> (Guard<T>, u64) {
|
|
|
|
let storage = T::storage().get();
|
|
|
|
let hash = {
|
|
|
|
let mut hasher = std::hash::BuildHasher::build_hasher(storage.hasher());
|
|
|
|
obj.hash(&mut hasher);
|
|
|
|
hasher.finish()
|
|
|
|
};
|
|
|
|
let shard_idx = storage.determine_shard(hash as usize);
|
|
|
|
let shard = &storage.shards()[shard_idx];
|
|
|
|
(shard.write(), hash)
|
2021-04-02 23:00:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Internable + ?Sized> Drop for Interned<T> {
|
2021-04-02 16:11:08 +00:00
|
|
|
#[inline]
|
2021-04-01 17:46:43 +00:00
|
|
|
fn drop(&mut self) {
|
|
|
|
// When the last `Ref` is dropped, remove the object from the global map.
|
2023-05-02 14:12:22 +00:00
|
|
|
if Arc::count(&self.arc) == 2 {
|
2021-04-01 17:46:43 +00:00
|
|
|
// Only `self` and the global map point to the object.
|
|
|
|
|
2021-04-02 16:11:08 +00:00
|
|
|
self.drop_slow();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: Internable + ?Sized> Interned<T> {
|
|
|
|
#[cold]
|
|
|
|
fn drop_slow(&mut self) {
|
2023-05-24 10:54:25 +00:00
|
|
|
let (mut shard, hash) = Self::select(&self.arc);
|
2021-04-01 17:46:43 +00:00
|
|
|
|
2023-05-24 10:54:25 +00:00
|
|
|
if Arc::count(&self.arc) != 2 {
|
2021-04-02 16:11:08 +00:00
|
|
|
// Another thread has interned another copy
|
|
|
|
return;
|
|
|
|
}
|
2021-04-01 17:46:43 +00:00
|
|
|
|
2023-05-24 10:54:25 +00:00
|
|
|
match shard.raw_entry_mut().from_key_hashed_nocheck(hash, &self.arc) {
|
|
|
|
RawEntryMut::Occupied(occ) => occ.remove(),
|
|
|
|
RawEntryMut::Vacant(_) => unreachable!(),
|
|
|
|
};
|
2021-04-01 17:46:43 +00:00
|
|
|
|
2021-04-02 16:11:08 +00:00
|
|
|
// Shrink the backing storage if the shard is less than 50% occupied.
|
|
|
|
if shard.len() * 2 < shard.capacity() {
|
|
|
|
shard.shrink_to_fit();
|
2021-04-01 17:46:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Compares interned `Ref`s using pointer equality.
|
2021-04-02 16:26:34 +00:00
|
|
|
impl<T: Internable> PartialEq for Interned<T> {
|
|
|
|
// NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects.
|
|
|
|
|
2021-04-01 17:46:43 +00:00
|
|
|
#[inline]
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
|
Arc::ptr_eq(&self.arc, &other.arc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-02 16:26:34 +00:00
|
|
|
impl<T: Internable> Eq for Interned<T> {}
|
2021-04-01 17:46:43 +00:00
|
|
|
|
2021-04-02 23:00:45 +00:00
|
|
|
impl PartialEq for Interned<str> {
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
|
Arc::ptr_eq(&self.arc, &other.arc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Eq for Interned<str> {}
|
|
|
|
|
2021-04-05 15:07:53 +00:00
|
|
|
impl<T: Internable + ?Sized> Hash for Interned<T> {
|
|
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
|
// NOTE: Cast disposes vtable pointer / slice/str length.
|
|
|
|
state.write_usize(Arc::as_ptr(&self.arc) as *const () as usize)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Internable + ?Sized> AsRef<T> for Interned<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
#[inline]
|
|
|
|
fn as_ref(&self) -> &T {
|
|
|
|
&self.arc
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Internable + ?Sized> Deref for Interned<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
type Target = T;
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.arc
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Internable + ?Sized> Clone for Interned<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
fn clone(&self) -> Self {
|
|
|
|
Self { arc: self.arc.clone() }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Debug + Internable + ?Sized> Debug for Interned<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
(*self.arc).fmt(f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-19 13:17:57 +00:00
|
|
|
impl<T: Display + Internable + ?Sized> Display for Interned<T> {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
(*self.arc).fmt(f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
pub struct InternStorage<T: ?Sized> {
|
2023-09-01 15:30:59 +00:00
|
|
|
map: OnceLock<InternMap<T>>,
|
2021-04-01 17:46:43 +00:00
|
|
|
}
|
|
|
|
|
2024-03-22 10:38:18 +00:00
|
|
|
#[allow(clippy::new_without_default)] // this a const fn, so it can't be default
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: ?Sized> InternStorage<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
pub const fn new() -> Self {
|
2023-09-01 15:30:59 +00:00
|
|
|
Self { map: OnceLock::new() }
|
2021-04-01 17:46:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
impl<T: Internable + ?Sized> InternStorage<T> {
|
2021-04-01 17:46:43 +00:00
|
|
|
fn get(&self) -> &InternMap<T> {
|
|
|
|
self.map.get_or_init(DashMap::default)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 20:24:40 +00:00
|
|
|
pub trait Internable: Hash + Eq + 'static {
|
2021-04-01 17:46:43 +00:00
|
|
|
fn storage() -> &'static InternStorage<Self>;
|
|
|
|
}
|
|
|
|
|
2021-04-05 14:59:03 +00:00
|
|
|
/// Implements `Internable` for a given list of types, making them usable with `Interned`.
|
|
|
|
#[macro_export]
|
|
|
|
#[doc(hidden)]
|
|
|
|
macro_rules! _impl_internable {
|
2021-04-02 16:26:34 +00:00
|
|
|
( $($t:path),+ $(,)? ) => { $(
|
2023-01-09 18:29:28 +00:00
|
|
|
impl $crate::Internable for $t {
|
|
|
|
fn storage() -> &'static $crate::InternStorage<Self> {
|
|
|
|
static STORAGE: $crate::InternStorage<$t> = $crate::InternStorage::new();
|
2021-04-01 17:46:43 +00:00
|
|
|
&STORAGE
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)+ };
|
|
|
|
}
|
|
|
|
|
2021-04-05 14:59:03 +00:00
|
|
|
pub use crate::_impl_internable as impl_internable;
|
|
|
|
|
2023-01-09 18:29:28 +00:00
|
|
|
impl_internable!(str,);
|