mirror of
https://github.com/nushell/nushell
synced 2024-12-26 13:03:07 +00:00
Fix locking soundness in PersistentPlugin (#12182)
# Description There were two problems in `PersistentPlugin` which could cause a deadlock: 1. There were two mutexes being used, and `get()` could potentially hold both simultaneously if it had to spawn. This won't necessarily cause a deadlock on its own, but it does mean that lock order is sensitive 2. `set_gc_config()` called `flush()` while still holding the lock, meaning that the GC thread had to proceed before the lock was released. However, waiting for the GC thread to proceed could mean waiting for the GC thread to call `stop()`, which itself would try to lock the mutex. So, it's not safe to wait for the GC thread while the lock is held. This is fixed now. I've also reverted #12177, as @IanManske reported that this was also happening for him on Linux, and it seems to be this problem which should not be platform-specific at all. I believe this solves it. # User-Facing Changes None # Tests + Formatting - 🟢 `toolkit fmt` - 🟢 `toolkit clippy` - 🟢 `toolkit test` - 🟢 `toolkit test stdlib` # After Submitting
This commit is contained in:
parent
cd71372ea9
commit
e1cfc96ee8
2 changed files with 40 additions and 52 deletions
|
@ -17,10 +17,18 @@ use super::{create_command, gc::PluginGc, make_plugin_interface, PluginInterface
|
|||
pub struct PersistentPlugin {
|
||||
/// Identity (filename, shell, name) of the plugin
|
||||
identity: PluginIdentity,
|
||||
/// Mutable state
|
||||
mutable: Mutex<MutableState>,
|
||||
}
|
||||
|
||||
/// The mutable state for the persistent plugin. This should all be behind one lock to prevent lock
|
||||
/// order problems.
|
||||
#[derive(Debug)]
|
||||
struct MutableState {
|
||||
/// Reference to the plugin if running
|
||||
running: Mutex<Option<RunningPlugin>>,
|
||||
running: Option<RunningPlugin>,
|
||||
/// Garbage collector config
|
||||
gc_config: Mutex<PluginGcConfig>,
|
||||
gc_config: PluginGcConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -38,8 +46,10 @@ impl PersistentPlugin {
|
|||
pub fn new(identity: PluginIdentity, gc_config: PluginGcConfig) -> PersistentPlugin {
|
||||
PersistentPlugin {
|
||||
identity,
|
||||
running: Mutex::new(None),
|
||||
gc_config: Mutex::new(gc_config),
|
||||
mutable: Mutex::new(MutableState {
|
||||
running: None,
|
||||
gc_config,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,14 +66,14 @@ impl PersistentPlugin {
|
|||
K: AsRef<OsStr>,
|
||||
V: AsRef<OsStr>,
|
||||
{
|
||||
let mut running = self.running.lock().map_err(|_| ShellError::NushellFailed {
|
||||
let mut mutable = self.mutable.lock().map_err(|_| ShellError::NushellFailed {
|
||||
msg: format!(
|
||||
"plugin `{}` running mutex poisoned, probably panic during spawn",
|
||||
"plugin `{}` mutex poisoned, probably panic during spawn",
|
||||
self.identity.name()
|
||||
),
|
||||
})?;
|
||||
|
||||
if let Some(ref running) = *running {
|
||||
if let Some(ref running) = mutable.running {
|
||||
// It exists, so just clone the interface
|
||||
Ok(running.interface.clone())
|
||||
} else {
|
||||
|
@ -71,9 +81,9 @@ impl PersistentPlugin {
|
|||
//
|
||||
// We hold the lock the whole time to prevent others from trying to spawn and ending
|
||||
// up with duplicate plugins
|
||||
let new_running = self.clone().spawn(envs()?)?;
|
||||
let new_running = self.clone().spawn(envs()?, &mutable.gc_config)?;
|
||||
let interface = new_running.interface.clone();
|
||||
*running = Some(new_running);
|
||||
mutable.running = Some(new_running);
|
||||
Ok(interface)
|
||||
}
|
||||
}
|
||||
|
@ -82,6 +92,7 @@ impl PersistentPlugin {
|
|||
fn spawn(
|
||||
self: Arc<Self>,
|
||||
envs: impl IntoIterator<Item = (impl AsRef<OsStr>, impl AsRef<OsStr>)>,
|
||||
gc_config: &PluginGcConfig,
|
||||
) -> Result<RunningPlugin, ShellError> {
|
||||
let source_file = self.identity.filename();
|
||||
let mut plugin_cmd = create_command(source_file, self.identity.shell());
|
||||
|
@ -111,14 +122,7 @@ impl PersistentPlugin {
|
|||
})?;
|
||||
|
||||
// Start the plugin garbage collector
|
||||
let gc_config =
|
||||
self.gc_config
|
||||
.lock()
|
||||
.map(|c| c.clone())
|
||||
.map_err(|_| ShellError::NushellFailed {
|
||||
msg: "plugin gc mutex poisoned".into(),
|
||||
})?;
|
||||
let gc = PluginGc::new(gc_config, &self)?;
|
||||
let gc = PluginGc::new(gc_config.clone(), &self)?;
|
||||
|
||||
let pid = child.id();
|
||||
let interface =
|
||||
|
@ -136,47 +140,51 @@ impl RegisteredPlugin for PersistentPlugin {
|
|||
fn is_running(&self) -> bool {
|
||||
// If the lock is poisoned, we return false here. That may not be correct, but this is a
|
||||
// failure state anyway that would be noticed at some point
|
||||
self.running.lock().map(|r| r.is_some()).unwrap_or(false)
|
||||
self.mutable
|
||||
.lock()
|
||||
.map(|m| m.running.is_some())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn pid(&self) -> Option<u32> {
|
||||
// Again, we return None for a poisoned lock.
|
||||
self.running
|
||||
self.mutable
|
||||
.lock()
|
||||
.ok()
|
||||
.and_then(|r| r.as_ref().map(|r| r.pid))
|
||||
.and_then(|r| r.running.as_ref().map(|r| r.pid))
|
||||
}
|
||||
|
||||
fn stop(&self) -> Result<(), ShellError> {
|
||||
let mut running = self.running.lock().map_err(|_| ShellError::NushellFailed {
|
||||
let mut mutable = self.mutable.lock().map_err(|_| ShellError::NushellFailed {
|
||||
msg: format!(
|
||||
"plugin `{}` running mutex poisoned, probably panic during spawn",
|
||||
"plugin `{}` mutable mutex poisoned, probably panic during spawn",
|
||||
self.identity.name()
|
||||
),
|
||||
})?;
|
||||
|
||||
// If the plugin is running, stop its GC, so that the GC doesn't accidentally try to stop
|
||||
// a future plugin
|
||||
if let Some(running) = running.as_ref() {
|
||||
if let Some(ref running) = mutable.running {
|
||||
running.gc.stop_tracking();
|
||||
}
|
||||
|
||||
// We don't try to kill the process or anything, we just drop the RunningPlugin. It should
|
||||
// exit soon after
|
||||
*running = None;
|
||||
mutable.running = None;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_gc_config(&self, gc_config: &PluginGcConfig) {
|
||||
if let Ok(mut conf) = self.gc_config.lock() {
|
||||
if let Ok(mut mutable) = self.mutable.lock() {
|
||||
// Save the new config for future calls
|
||||
*conf = gc_config.clone();
|
||||
}
|
||||
if let Ok(running) = self.running.lock() {
|
||||
if let Some(running) = running.as_ref() {
|
||||
// If the plugin is already running, propagate the config change to the running GC
|
||||
running.gc.set_config(gc_config.clone());
|
||||
running.gc.flush();
|
||||
mutable.gc_config = gc_config.clone();
|
||||
|
||||
// If the plugin is already running, propagate the config change to the running GC
|
||||
if let Some(gc) = mutable.running.as_ref().map(|running| running.gc.clone()) {
|
||||
// We don't want to get caught holding the lock
|
||||
drop(mutable);
|
||||
gc.set_config(gc_config.clone());
|
||||
gc.flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -193,10 +193,6 @@ fn custom_values_can_still_be_collapsed_after_stop() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
ignore = "Plugin GC tests are disabled temporarily on macOS because they get stuck"
|
||||
)]
|
||||
fn plugin_gc_can_be_configured_to_stop_plugins_immediately() {
|
||||
// I know the test is to stop "immediately", but if we actually check immediately it could
|
||||
// lead to a race condition. So there's a 1ms sleep just to be fair.
|
||||
|
@ -232,10 +228,6 @@ fn plugin_gc_can_be_configured_to_stop_plugins_immediately() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
ignore = "Plugin GC tests are disabled temporarily on macOS because they get stuck"
|
||||
)]
|
||||
fn plugin_gc_can_be_configured_to_stop_plugins_after_delay() {
|
||||
let out = nu_with_plugins!(
|
||||
cwd: ".",
|
||||
|
@ -293,10 +285,6 @@ fn plugin_gc_can_be_configured_to_stop_plugins_after_delay() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
ignore = "Plugin GC tests are disabled temporarily on macOS because they get stuck"
|
||||
)]
|
||||
fn plugin_gc_can_be_configured_as_disabled() {
|
||||
let out = nu_with_plugins!(
|
||||
cwd: ".",
|
||||
|
@ -329,10 +317,6 @@ fn plugin_gc_can_be_configured_as_disabled() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
ignore = "Plugin GC tests are disabled temporarily on macOS because they get stuck"
|
||||
)]
|
||||
fn plugin_gc_can_be_disabled_by_plugin() {
|
||||
let out = nu_with_plugins!(
|
||||
cwd: ".",
|
||||
|
@ -350,10 +334,6 @@ fn plugin_gc_can_be_disabled_by_plugin() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
ignore = "Plugin GC tests are disabled temporarily on macOS because they get stuck"
|
||||
)]
|
||||
fn plugin_gc_does_not_stop_plugin_while_stream_output_is_active() {
|
||||
let out = nu_with_plugins!(
|
||||
cwd: ".",
|
||||
|
|
Loading…
Reference in a new issue