From 951c51e7407a4aecfb832ac2317d20704f649c32 Mon Sep 17 00:00:00 2001 From: Joining7943 <111500881+Joining7943@users.noreply.github.com> Date: Tue, 13 Sep 2022 22:54:36 +0200 Subject: [PATCH] tail: large refactoring and cleanup of the tail code base. See also #3905 for details --- src/uu/tail/src/args.rs | 473 +++++++++ src/uu/tail/src/chunks.rs | 14 +- src/uu/tail/src/follow/files.rs | 211 ++++ src/uu/tail/src/follow/mod.rs | 9 + src/uu/tail/src/follow/watch.rs | 595 +++++++++++ src/uu/tail/src/paths.rs | 279 ++++++ src/uu/tail/src/tail.rs | 1659 ++++--------------------------- src/uu/tail/src/text.rs | 20 + tests/by-util/test_tail.rs | 36 + 9 files changed, 1848 insertions(+), 1448 deletions(-) create mode 100644 src/uu/tail/src/args.rs create mode 100644 src/uu/tail/src/follow/files.rs create mode 100644 src/uu/tail/src/follow/mod.rs create mode 100644 src/uu/tail/src/follow/watch.rs create mode 100644 src/uu/tail/src/paths.rs create mode 100644 src/uu/tail/src/text.rs diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs new file mode 100644 index 000000000..dfa7f6035 --- /dev/null +++ b/src/uu/tail/src/args.rs @@ -0,0 +1,473 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (ToDO) kqueue Signum + +use crate::paths::Input; +use crate::{parse, platform, Quotable}; +use clap::{Arg, ArgMatches, Command, ValueSource}; +use std::collections::VecDeque; +use std::ffi::OsString; +use std::time::Duration; +use uucore::error::{UResult, USimpleError, UUsageError}; +use uucore::format_usage; +use uucore::parse_size::{parse_size, ParseSizeError}; + +const ABOUT: &str = "\ + Print the last 10 lines of each FILE to standard output.\n\ + With more than one FILE, precede each with a header giving the file name.\n\ + With no FILE, or when FILE is -, read standard input.\n\ + \n\ + Mandatory arguments to long flags are mandatory for short flags too.\ + "; +const USAGE: &str = "{} [FLAG]... [FILE]..."; + +pub mod options { + pub mod verbosity { + pub static QUIET: &str = "quiet"; + pub static VERBOSE: &str = "verbose"; + } + pub static BYTES: &str = "bytes"; + pub static FOLLOW: &str = "follow"; + pub static LINES: &str = "lines"; + pub static PID: &str = "pid"; + pub static SLEEP_INT: &str = "sleep-interval"; + pub static ZERO_TERM: &str = "zero-terminated"; + pub static DISABLE_INOTIFY_TERM: &str = "-disable-inotify"; // NOTE: three hyphens is correct + pub static USE_POLLING: &str = "use-polling"; + pub static RETRY: &str = "retry"; + pub static FOLLOW_RETRY: &str = "F"; + pub static MAX_UNCHANGED_STATS: &str = "max-unchanged-stats"; + pub static ARG_FILES: &str = "files"; + pub static PRESUME_INPUT_PIPE: &str = "-presume-input-pipe"; // NOTE: three hyphens is correct +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Signum { + Negative(u64), + Positive(u64), + PlusZero, + MinusZero, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum FilterMode { + Bytes(Signum), + + /// Mode for lines delimited by delimiter as u8 + Lines(Signum, u8), +} + +impl FilterMode { + fn from(matches: &ArgMatches) -> UResult { + let zero_term = matches.contains_id(options::ZERO_TERM); + let mode = if let Some(arg) = matches.value_of(options::BYTES) { + match parse_num(arg) { + Ok(signum) => Self::Bytes(signum), + Err(e) => { + return Err(UUsageError::new( + 1, + format!("invalid number of bytes: {}", e), + )) + } + } + } else if let Some(arg) = matches.value_of(options::LINES) { + match parse_num(arg) { + Ok(signum) => { + let delimiter = if zero_term { 0 } else { b'\n' }; + Self::Lines(signum, delimiter) + } + Err(e) => { + return Err(UUsageError::new( + 1, + format!("invalid number of lines: {}", e), + )) + } + } + } else if zero_term { + Self::default_zero() + } else { + Self::default() + }; + + Ok(mode) + } + + fn default_zero() -> Self { + Self::Lines(Signum::Negative(10), 0) + } +} + +impl Default for FilterMode { + fn default() -> Self { + Self::Lines(Signum::Negative(10), b'\n') + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum FollowMode { + Descriptor, + Name, +} + +#[derive(Debug, Default)] +pub struct Settings { + pub follow: Option, + pub max_unchanged_stats: u32, + pub mode: FilterMode, + pub pid: platform::Pid, + pub retry: bool, + pub sleep_sec: Duration, + pub use_polling: bool, + pub verbose: bool, + pub presume_input_pipe: bool, + pub inputs: VecDeque, +} + +impl Settings { + pub fn from(matches: &clap::ArgMatches) -> UResult { + let mut settings: Self = Self { + sleep_sec: Duration::from_secs_f32(1.0), + max_unchanged_stats: 5, + ..Default::default() + }; + + settings.follow = if matches.contains_id(options::FOLLOW_RETRY) { + Some(FollowMode::Name) + } else if matches.value_source(options::FOLLOW) != Some(ValueSource::CommandLine) { + None + } else if matches.value_of(options::FOLLOW) == Some("name") { + Some(FollowMode::Name) + } else { + Some(FollowMode::Descriptor) + }; + + settings.retry = + matches.contains_id(options::RETRY) || matches.contains_id(options::FOLLOW_RETRY); + + if settings.retry && settings.follow.is_none() { + show_warning!("--retry ignored; --retry is useful only when following"); + } + + if let Some(s) = matches.value_of(options::SLEEP_INT) { + settings.sleep_sec = match s.parse::() { + Ok(s) => Duration::from_secs_f32(s), + Err(_) => { + return Err(UUsageError::new( + 1, + format!("invalid number of seconds: {}", s.quote()), + )) + } + } + } + + settings.use_polling = matches.contains_id(options::USE_POLLING); + + if let Some(s) = matches.value_of(options::MAX_UNCHANGED_STATS) { + settings.max_unchanged_stats = match s.parse::() { + Ok(s) => s, + Err(_) => { + return Err(UUsageError::new( + 1, + format!( + "invalid maximum number of unchanged stats between opens: {}", + s.quote() + ), + )); + } + } + } + + if let Some(pid_str) = matches.value_of(options::PID) { + match pid_str.parse() { + Ok(pid) => { + // NOTE: on unix platform::Pid is i32, on windows platform::Pid is u32 + #[cfg(unix)] + if pid < 0 { + // NOTE: tail only accepts an unsigned pid + return Err(USimpleError::new( + 1, + format!("invalid PID: {}", pid_str.quote()), + )); + } + settings.pid = pid; + if settings.follow.is_none() { + show_warning!("PID ignored; --pid=PID is useful only when following"); + } + if !platform::supports_pid_checks(settings.pid) { + show_warning!("--pid=PID is not supported on this system"); + settings.pid = 0; + } + } + Err(e) => { + return Err(USimpleError::new( + 1, + format!("invalid PID: {}: {}", pid_str.quote(), e), + )); + } + } + } + + settings.mode = FilterMode::from(matches)?; + + // Mimic GNU's tail for -[nc]0 without -f and exit immediately + if settings.follow.is_none() + && matches!( + settings.mode, + FilterMode::Lines(Signum::MinusZero, _) | FilterMode::Bytes(Signum::MinusZero) + ) + { + std::process::exit(0) + } + + let mut inputs: VecDeque = matches + .get_many::(options::ARG_FILES) + .map(|v| v.map(|string| Input::from(string.clone())).collect()) + .unwrap_or_default(); + + // apply default and add '-' to inputs if none is present + if inputs.is_empty() { + inputs.push_front(Input::default()); + } + + settings.verbose = (matches.contains_id(options::verbosity::VERBOSE) || inputs.len() > 1) + && !matches.contains_id(options::verbosity::QUIET); + + settings.inputs = inputs; + + settings.presume_input_pipe = matches.contains_id(options::PRESUME_INPUT_PIPE); + + Ok(settings) + } +} + +pub fn arg_iterate<'a>( + mut args: impl uucore::Args + 'a, +) -> UResult + 'a>> { + // argv[0] is always present + let first = args.next().unwrap(); + if let Some(second) = args.next() { + if let Some(s) = second.to_str() { + match parse::parse_obsolete(s) { + Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), + Some(Err(e)) => Err(UUsageError::new( + 1, + match e { + parse::ParseError::Syntax => format!("bad argument format: {}", s.quote()), + parse::ParseError::Overflow => format!( + "invalid argument: {} Value too large for defined datatype", + s.quote() + ), + }, + )), + None => Ok(Box::new(vec![first, second].into_iter().chain(args))), + } + } else { + Err(UUsageError::new(1, "bad argument encoding".to_owned())) + } + } else { + Ok(Box::new(vec![first].into_iter())) + } +} + +fn parse_num(src: &str) -> Result { + let mut size_string = src.trim(); + let mut starting_with = false; + + if let Some(c) = size_string.chars().next() { + if c == '+' || c == '-' { + // tail: '-' is not documented (8.32 man pages) + size_string = &size_string[1..]; + if c == '+' { + starting_with = true; + } + } + } else { + return Err(ParseSizeError::ParseFailure(src.to_string())); + } + + parse_size(size_string).map(|n| match (n, starting_with) { + (0, true) => Signum::PlusZero, + (0, false) => Signum::MinusZero, + (n, true) => Signum::Positive(n), + (n, false) => Signum::Negative(n), + }) +} + +pub fn stdin_is_pipe_or_fifo() -> bool { + #[cfg(unix)] + { + platform::stdin_is_pipe_or_fifo() + } + #[cfg(windows)] + { + winapi_util::file::typ(winapi_util::HandleRef::stdin()) + .map(|t| t.is_disk() || t.is_pipe()) + .unwrap_or(false) + } +} + +pub fn parse_args(args: impl uucore::Args) -> UResult { + let matches = uu_app().try_get_matches_from(arg_iterate(args)?)?; + Settings::from(&matches) +} + +pub fn uu_app<'a>() -> Command<'a> { + #[cfg(target_os = "linux")] + pub static POLLING_HELP: &str = "Disable 'inotify' support and use polling instead"; + #[cfg(all(unix, not(target_os = "linux")))] + pub static POLLING_HELP: &str = "Disable 'kqueue' support and use polling instead"; + #[cfg(target_os = "windows")] + pub static POLLING_HELP: &str = + "Disable 'ReadDirectoryChanges' support and use polling instead"; + + Command::new(uucore::util_name()) + .version(crate_version!()) + .about(ABOUT) + .override_usage(format_usage(USAGE)) + .infer_long_args(true) + .arg( + Arg::new(options::BYTES) + .short('c') + .long(options::BYTES) + .takes_value(true) + .allow_hyphen_values(true) + .overrides_with_all(&[options::BYTES, options::LINES]) + .help("Number of bytes to print"), + ) + .arg( + Arg::new(options::FOLLOW) + .short('f') + .long(options::FOLLOW) + .default_value("descriptor") + .takes_value(true) + .min_values(0) + .max_values(1) + .require_equals(true) + .value_parser(["descriptor", "name"]) + .help("Print the file as it grows"), + ) + .arg( + Arg::new(options::LINES) + .short('n') + .long(options::LINES) + .takes_value(true) + .allow_hyphen_values(true) + .overrides_with_all(&[options::BYTES, options::LINES]) + .help("Number of lines to print"), + ) + .arg( + Arg::new(options::PID) + .long(options::PID) + .takes_value(true) + .help("With -f, terminate after process ID, PID dies"), + ) + .arg( + Arg::new(options::verbosity::QUIET) + .short('q') + .long(options::verbosity::QUIET) + .visible_alias("silent") + .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) + .help("Never output headers giving file names"), + ) + .arg( + Arg::new(options::SLEEP_INT) + .short('s') + .takes_value(true) + .long(options::SLEEP_INT) + .help("Number of seconds to sleep between polling the file when running with -f"), + ) + .arg( + Arg::new(options::MAX_UNCHANGED_STATS) + .takes_value(true) + .long(options::MAX_UNCHANGED_STATS) + .help( + "Reopen a FILE which has not changed size after N (default 5) iterations \ + to see if it has been unlinked or renamed (this is the usual case of rotated \ + log files); This option is meaningful only when polling \ + (i.e., with --use-polling) and when --follow=name", + ), + ) + .arg( + Arg::new(options::verbosity::VERBOSE) + .short('v') + .long(options::verbosity::VERBOSE) + .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) + .help("Always output headers giving file names"), + ) + .arg( + Arg::new(options::ZERO_TERM) + .short('z') + .long(options::ZERO_TERM) + .help("Line delimiter is NUL, not newline"), + ) + .arg( + Arg::new(options::USE_POLLING) + .alias(options::DISABLE_INOTIFY_TERM) // NOTE: Used by GNU's test suite + .alias("dis") // NOTE: Used by GNU's test suite + .long(options::USE_POLLING) + .help(POLLING_HELP), + ) + .arg( + Arg::new(options::RETRY) + .long(options::RETRY) + .help("Keep trying to open a file if it is inaccessible"), + ) + .arg( + Arg::new(options::FOLLOW_RETRY) + .short('F') + .help("Same as --follow=name --retry") + .overrides_with_all(&[options::RETRY, options::FOLLOW]), + ) + .arg( + Arg::new(options::PRESUME_INPUT_PIPE) + .long(options::PRESUME_INPUT_PIPE) + .alias(options::PRESUME_INPUT_PIPE) + .hide(true), + ) + + .arg( + Arg::new(options::ARG_FILES) + .multiple_occurrences(true) + .takes_value(true) + .min_values(1) + .value_hint(clap::ValueHint::FilePath), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_num_when_sign_is_given() { + let result = parse_num("+0"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::PlusZero); + + let result = parse_num("+1"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::Positive(1)); + + let result = parse_num("-0"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::MinusZero); + + let result = parse_num("-1"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::Negative(1)); + } + + #[test] + fn test_parse_num_when_no_sign_is_given() { + let result = parse_num("0"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::MinusZero); + + let result = parse_num("1"); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), Signum::Negative(1)); + } +} diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs index 8fb53c769..acfc69a30 100644 --- a/src/uu/tail/src/chunks.rs +++ b/src/uu/tail/src/chunks.rs @@ -10,7 +10,7 @@ // spell-checker:ignore (ToDO) filehandle BUFSIZ use std::collections::VecDeque; use std::fs::File; -use std::io::{BufReader, Read, Seek, SeekFrom, Write}; +use std::io::{BufRead, Read, Seek, SeekFrom, Write}; use uucore::error::UResult; /// When reading files in reverse in `bounded_tail`, this is the size of each @@ -208,7 +208,7 @@ impl BytesChunk { /// that number of bytes. If EOF is reached (so 0 bytes are read), then returns /// [`UResult`] or else the result with [`Some(bytes)`] where bytes is the number of bytes /// read from the source. - pub fn fill(&mut self, filehandle: &mut BufReader) -> UResult> { + pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { let num_bytes = filehandle.read(&mut self.buffer)?; self.bytes = num_bytes; if num_bytes == 0 { @@ -283,7 +283,7 @@ impl BytesChunkBuffer { /// let mut chunks = BytesChunkBuffer::new(num_print); /// chunks.fill(&mut reader).unwrap(); /// ``` - pub fn fill(&mut self, reader: &mut BufReader) -> UResult<()> { + pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { let mut chunk = Box::new(BytesChunk::new()); // fill chunks with all bytes from reader and reuse already instantiated chunks if possible @@ -323,6 +323,10 @@ impl BytesChunkBuffer { } Ok(()) } + + pub fn has_data(&self) -> bool { + !self.chunks.is_empty() + } } /// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current @@ -452,7 +456,7 @@ impl LinesChunk { /// that number of bytes. This function works like the [`BytesChunk::fill`] function besides /// that this function also counts and stores the number of lines encountered while reading from /// the `filehandle`. - pub fn fill(&mut self, filehandle: &mut BufReader) -> UResult> { + pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult> { match self.chunk.fill(filehandle)? { None => { self.lines = 0; @@ -556,7 +560,7 @@ impl LinesChunkBuffer { /// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator /// over these chunks. If there are no chunks, for example because the piped stdin contained no /// lines, or `num_print = 0` then `iterator.next` will return None. - pub fn fill(&mut self, reader: &mut BufReader) -> UResult<()> { + pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> { let mut chunk = Box::new(LinesChunk::new(self.delimiter)); while (chunk.fill(reader)?).is_some() { diff --git a/src/uu/tail/src/follow/files.rs b/src/uu/tail/src/follow/files.rs new file mode 100644 index 000000000..1be090217 --- /dev/null +++ b/src/uu/tail/src/follow/files.rs @@ -0,0 +1,211 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore tailable seekable stdlib (stdlib) + +use crate::args::Settings; +use crate::chunks::BytesChunkBuffer; +use crate::paths::{HeaderPrinter, PathExtTail}; +use crate::text; +use std::collections::hash_map::Keys; +use std::collections::HashMap; +use std::fs::{File, Metadata}; +use std::io::{stdout, BufRead, BufReader, BufWriter}; + +use std::path::{Path, PathBuf}; +use uucore::error::UResult; + +/// Data structure to keep a handle on files to follow. +/// `last` always holds the path/key of the last file that was printed from. +/// The keys of the HashMap can point to an existing file path (normal case), +/// or stdin ("-"), or to a non existing path (--retry). +/// For existing files, all keys in the HashMap are absolute Paths. +pub struct FileHandling { + map: HashMap, + last: Option, + header_printer: HeaderPrinter, +} + +impl FileHandling { + pub fn from(settings: &Settings) -> Self { + Self { + map: HashMap::with_capacity(settings.inputs.len()), + last: None, + header_printer: HeaderPrinter::new(settings.verbose, false), + } + } + + /// Wrapper for HashMap::insert using Path::canonicalize + pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) { + let k = Self::canonicalize_path(k); + if update_last { + self.last = Some(k.to_owned()); + } + let _ = self.map.insert(k, v); + } + + /// Wrapper for HashMap::remove using Path::canonicalize + pub fn remove(&mut self, k: &Path) -> PathData { + self.map.remove(&Self::canonicalize_path(k)).unwrap() + } + + /// Wrapper for HashMap::get using Path::canonicalize + pub fn get(&self, k: &Path) -> &PathData { + self.map.get(&Self::canonicalize_path(k)).unwrap() + } + + /// Wrapper for HashMap::get_mut using Path::canonicalize + pub fn get_mut(&mut self, k: &Path) -> &mut PathData { + self.map.get_mut(&Self::canonicalize_path(k)).unwrap() + } + + /// Canonicalize `path` if it is not already an absolute path + fn canonicalize_path(path: &Path) -> PathBuf { + if path.is_relative() && !path.is_stdin() { + if let Ok(p) = path.canonicalize() { + return p; + } + } + path.to_owned() + } + + pub fn get_mut_metadata(&mut self, path: &Path) -> Option<&Metadata> { + self.get_mut(path).metadata.as_ref() + } + + pub fn keys(&self) -> Keys { + self.map.keys() + } + + pub fn contains_key(&self, k: &Path) -> bool { + self.map.contains_key(k) + } + + pub fn get_last(&self) -> Option<&PathBuf> { + self.last.as_ref() + } + + /// Return true if there is only stdin remaining + pub fn only_stdin_remaining(&self) -> bool { + self.map.len() == 1 && (self.map.contains_key(Path::new(text::DASH))) + } + + /// Return true if there is at least one "tailable" path (or stdin) remaining + pub fn files_remaining(&self) -> bool { + for path in self.map.keys() { + if path.is_tailable() || path.is_stdin() { + return true; + } + } + false + } + + /// Returns true if there are no files remaining + pub fn no_files_remaining(&self, settings: &Settings) -> bool { + self.map.is_empty() || !self.files_remaining() && !settings.retry + } + + /// Set `reader` to None to indicate that `path` is not an existing file anymore. + pub fn reset_reader(&mut self, path: &Path) { + self.get_mut(path).reader = None; + } + + /// Reopen the file at the monitored `path` + pub fn update_reader(&mut self, path: &Path) -> UResult<()> { + /* + BUG: If it's not necessary to reopen a file, GNU's tail calls seek to offset 0. + However we can't call seek here because `BufRead` does not implement `Seek`. + As a workaround we always reopen the file even though this might not always + be necessary. + */ + self.get_mut(path) + .reader + .replace(Box::new(BufReader::new(File::open(&path)?))); + Ok(()) + } + + /// Reload metadata from `path`, or `metadata` + pub fn update_metadata(&mut self, path: &Path, metadata: Option) { + self.get_mut(path).metadata = if metadata.is_some() { + metadata + } else { + path.metadata().ok() + }; + } + + /// Read new data from `path` and print it to stdout + pub fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult { + let mut chunks = BytesChunkBuffer::new(u64::MAX); + if let Some(reader) = self.get_mut(path).reader.as_mut() { + chunks.fill(reader)?; + } + if chunks.has_data() { + if self.needs_header(path, verbose) { + let display_name = self.get(path).display_name.clone(); + self.header_printer.print(display_name.as_str()); + } + + let stdout = stdout(); + let writer = BufWriter::new(stdout.lock()); + chunks.print(writer)?; + + self.last.replace(path.to_owned()); + self.update_metadata(path, None); + Ok(true) + } else { + Ok(false) + } + } + + /// Decide if printing `path` needs a header based on when it was last printed + pub fn needs_header(&self, path: &Path, verbose: bool) -> bool { + if verbose { + if let Some(ref last) = self.last { + return !last.eq(&path); + } else { + return true; + } + } + false + } +} + +/// Data structure to keep a handle on the BufReader, Metadata +/// and the display_name (header_name) of files that are being followed. +pub struct PathData { + pub reader: Option>, + pub metadata: Option, + pub display_name: String, +} + +impl PathData { + pub fn new( + reader: Option>, + metadata: Option, + display_name: &str, + ) -> Self { + Self { + reader, + metadata, + display_name: display_name.to_owned(), + } + } + pub fn from_other_with_path(data: Self, path: &Path) -> Self { + // Remove old reader + let old_reader = data.reader; + let reader = if old_reader.is_some() { + // Use old reader with the same file descriptor if there is one + old_reader + } else if let Ok(file) = File::open(path) { + // Open new file tail from start + Some(Box::new(BufReader::new(file)) as Box) + } else { + // Probably file was renamed/moved or removed again + None + }; + + Self::new(reader, path.metadata().ok(), data.display_name.as_str()) + } +} diff --git a/src/uu/tail/src/follow/mod.rs b/src/uu/tail/src/follow/mod.rs new file mode 100644 index 000000000..4bb2798d1 --- /dev/null +++ b/src/uu/tail/src/follow/mod.rs @@ -0,0 +1,9 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +mod files; +mod watch; + +pub use watch::{follow, WatcherService}; diff --git a/src/uu/tail/src/follow/watch.rs b/src/uu/tail/src/follow/watch.rs new file mode 100644 index 000000000..b00d85f44 --- /dev/null +++ b/src/uu/tail/src/follow/watch.rs @@ -0,0 +1,595 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (ToDO) tailable untailable stdlib kqueue Uncategorized unwatch + +use crate::args::{FollowMode, Settings}; +use crate::follow::files::{FileHandling, PathData}; +use crate::paths::{Input, InputKind, MetadataExtTail, PathExtTail}; +use crate::{platform, text}; +use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind}; +use std::collections::VecDeque; +use std::io::BufRead; +use std::path::{Path, PathBuf}; +use std::sync::mpsc; +use std::sync::mpsc::{channel, Receiver}; +use uucore::display::Quotable; +use uucore::error::{set_exit_code, UResult, USimpleError}; + +pub struct WatcherRx { + watcher: Box, + receiver: Receiver>, +} + +impl WatcherRx { + fn new( + watcher: Box, + receiver: Receiver>, + ) -> Self { + Self { watcher, receiver } + } + + /// Wrapper for `notify::Watcher::watch` to also add the parent directory of `path` if necessary. + fn watch_with_parent(&mut self, path: &Path) -> UResult<()> { + let mut path = path.to_owned(); + #[cfg(target_os = "linux")] + if path.is_file() { + /* + NOTE: Using the parent directory instead of the file is a workaround. + This workaround follows the recommendation of the notify crate authors: + > On some platforms, if the `path` is renamed or removed while being watched, behavior may + > be unexpected. See discussions in [#165] and [#166]. If less surprising behavior is wanted + > one may non-recursively watch the _parent_ directory as well and manage related events. + NOTE: Adding both: file and parent results in duplicate/wrong events. + Tested for notify::InotifyWatcher and for notify::PollWatcher. + */ + if let Some(parent) = path.parent() { + if parent.is_dir() { + path = parent.to_owned(); + } else { + path = PathBuf::from("."); + } + } else { + return Err(USimpleError::new( + 1, + format!("cannot watch parent directory of {}", path.display()), + )); + }; + } + if path.is_relative() { + path = path.canonicalize()?; + } + + // for syscalls: 2x "inotify_add_watch" ("filename" and ".") and 1x "inotify_rm_watch" + self.watch(&path, RecursiveMode::NonRecursive)?; + Ok(()) + } + + fn watch(&mut self, path: &Path, mode: RecursiveMode) -> UResult<()> { + self.watcher + .watch(path, mode) + .map_err(|err| USimpleError::new(1, err.to_string())) + } + + fn unwatch(&mut self, path: &Path) -> UResult<()> { + self.watcher + .unwatch(path) + .map_err(|err| USimpleError::new(1, err.to_string())) + } +} + +pub struct WatcherService { + /// Whether --retry was given on the command line + pub retry: bool, + + /// The [`FollowMode`] + pub follow: Option, + + /// Indicates whether to use the fallback `polling` method instead of the + /// platform specific event driven method. Since `use_polling` is subject to + /// change during runtime it is moved out of [`Settings`]. + pub use_polling: bool, + pub watcher_rx: Option, + pub orphans: Vec, + pub files: FileHandling, +} + +impl WatcherService { + pub fn new( + retry: bool, + follow: Option, + use_polling: bool, + files: FileHandling, + ) -> Self { + Self { + retry, + follow, + use_polling, + watcher_rx: None, + orphans: Vec::new(), + files, + } + } + + pub fn from(settings: &Settings) -> Self { + Self::new( + settings.retry, + settings.follow, + settings.use_polling, + FileHandling::from(settings), + ) + } + + pub fn add_path( + &mut self, + path: &Path, + display_name: &str, + reader: Option>, + update_last: bool, + ) -> UResult<()> { + if self.follow.is_some() { + let path = if path.is_relative() { + std::env::current_dir()?.join(path) + } else { + path.to_owned() + }; + let metadata = path.metadata().ok(); + self.files.insert( + &path, + PathData::new(reader, metadata, display_name), + update_last, + ); + } + + Ok(()) + } + + pub fn add_stdin( + &mut self, + display_name: &str, + reader: Option>, + update_last: bool, + ) -> UResult<()> { + if self.follow == Some(FollowMode::Descriptor) { + return self.add_path( + &PathBuf::from(text::DEV_STDIN), + display_name, + reader, + update_last, + ); + } + + Ok(()) + } + + pub fn add_bad_path( + &mut self, + path: &Path, + display_name: &str, + update_last: bool, + ) -> UResult<()> { + if self.retry && self.follow.is_some() { + return self.add_path(path, display_name, None, update_last); + } + + Ok(()) + } + + pub fn start(&mut self, settings: &Settings) -> UResult<()> { + if settings.follow.is_none() { + return Ok(()); + } + + let (tx, rx) = channel(); + + /* + Watcher is implemented per platform using the best implementation available on that + platform. In addition to such event driven implementations, a polling implementation + is also provided that should work on any platform. + Linux / Android: inotify + macOS: FSEvents / kqueue + Windows: ReadDirectoryChangesWatcher + FreeBSD / NetBSD / OpenBSD / DragonflyBSD: kqueue + Fallback: polling every n seconds + + NOTE: + We force the use of kqueue with: features=["macos_kqueue"]. + On macOS only `kqueue` is suitable for our use case because `FSEvents` + waits for file close util it delivers a modify event. See: + https://github.com/notify-rs/notify/issues/240 + */ + + let watcher: Box; + let watcher_config = notify::Config::default() + .with_poll_interval(settings.sleep_sec) + /* + NOTE: By enabling compare_contents, performance will be significantly impacted + as all files will need to be read and hashed at each `poll_interval`. + However, this is necessary to pass: "gnu/tests/tail-2/F-vs-rename.sh" + */ + .with_compare_contents(true); + if self.use_polling || RecommendedWatcher::kind() == WatcherKind::PollWatcher { + self.use_polling = true; // We have to use polling because there's no supported backend + watcher = Box::new(notify::PollWatcher::new(tx, watcher_config).unwrap()); + } else { + let tx_clone = tx.clone(); + match notify::RecommendedWatcher::new(tx, notify::Config::default()) { + Ok(w) => watcher = Box::new(w), + Err(e) if e.to_string().starts_with("Too many open files") => { + /* + NOTE: This ErrorKind is `Uncategorized`, but it is not recommended + to match an error against `Uncategorized` + NOTE: Could be tested with decreasing `max_user_instances`, e.g.: + `sudo sysctl fs.inotify.max_user_instances=64` + */ + show_error!( + "{} cannot be used, reverting to polling: Too many open files", + text::BACKEND + ); + set_exit_code(1); + self.use_polling = true; + watcher = Box::new(notify::PollWatcher::new(tx_clone, watcher_config).unwrap()); + } + Err(e) => return Err(USimpleError::new(1, e.to_string())), + }; + } + + self.watcher_rx = Some(WatcherRx::new(watcher, rx)); + self.init_files(&settings.inputs)?; + + Ok(()) + } + + pub fn follow_descriptor(&self) -> bool { + self.follow == Some(FollowMode::Descriptor) + } + + pub fn follow_name(&self) -> bool { + self.follow == Some(FollowMode::Name) + } + + pub fn follow_descriptor_retry(&self) -> bool { + self.follow_descriptor() && self.retry + } + + pub fn follow_name_retry(&self) -> bool { + self.follow_name() && self.retry + } + + fn init_files(&mut self, inputs: &VecDeque) -> UResult<()> { + if let Some(watcher_rx) = &mut self.watcher_rx { + for input in inputs { + match input.kind() { + InputKind::Stdin => continue, + InputKind::File(path) => { + #[cfg(all(unix, not(target_os = "linux")))] + if !path.is_file() { + continue; + } + let mut path = path.to_owned(); + if path.is_relative() { + path = std::env::current_dir()?.join(path); + } + + if path.is_tailable() { + // Add existing regular files to `Watcher` (InotifyWatcher). + watcher_rx.watch_with_parent(&path)?; + } else if !path.is_orphan() { + // If `path` is not a tailable file, add its parent to `Watcher`. + watcher_rx + .watch(path.parent().unwrap(), RecursiveMode::NonRecursive)?; + } else { + // If there is no parent, add `path` to `orphans`. + self.orphans.push(path); + } + } + } + } + } + Ok(()) + } + + fn handle_event( + &mut self, + event: ¬ify::Event, + settings: &Settings, + ) -> UResult> { + use notify::event::*; + + let event_path = event.paths.first().unwrap(); + let mut paths: Vec = vec![]; + let display_name = self.files.get(event_path).display_name.clone(); + + match event.kind { + EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime)) + + // | EventKind::Access(AccessKind::Close(AccessMode::Write)) + | EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) + | EventKind::Modify(ModifyKind::Data(DataChange::Any)) + | EventKind::Modify(ModifyKind::Name(RenameMode::To)) => { + if let Ok(new_md) = event_path.metadata() { + + let is_tailable = new_md.is_tailable(); + let pd = self.files.get(event_path); + if let Some(old_md) = &pd.metadata { + if is_tailable { + // We resume tracking from the start of the file, + // assuming it has been truncated to 0. This mimics GNU's `tail` + // behavior and is the usual truncation operation for log self.files. + if !old_md.is_tailable() { + show_error!( "{} has become accessible", display_name.quote()); + self.files.update_reader(event_path)?; + } else if pd.reader.is_none() { + show_error!( "{} has appeared; following new file", display_name.quote()); + self.files.update_reader(event_path)?; + } else if event.kind == EventKind::Modify(ModifyKind::Name(RenameMode::To)) + || (self.use_polling + && !old_md.file_id_eq(&new_md)) { + show_error!( "{} has been replaced; following new file", display_name.quote()); + self.files.update_reader(event_path)?; + } else if old_md.got_truncated(&new_md)? { + show_error!("{}: file truncated", display_name); + self.files.update_reader(event_path)?; + } + paths.push(event_path.to_owned()); + } else if !is_tailable && old_md.is_tailable() { + if pd.reader.is_some() { + self.files.reset_reader(event_path); + } else { + show_error!( + "{} has been replaced with an untailable file", + display_name.quote() + ); + } + } + } else if is_tailable { + show_error!( "{} has appeared; following new file", display_name.quote()); + self.files.update_reader(event_path)?; + paths.push(event_path.to_owned()); + } else if settings.retry { + if self.follow_descriptor() { + show_error!( + "{} has been replaced with an untailable file; giving up on this name", + display_name.quote() + ); + let _ = self.watcher_rx.as_mut().unwrap().watcher.unwatch(event_path); + self.files.remove(event_path); + if self.files.no_files_remaining(settings) { + return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); + } + } else { + show_error!( + "{} has been replaced with an untailable file", + display_name.quote() + ); + } + } + self.files.update_metadata(event_path, Some(new_md)); + } + } + EventKind::Remove(RemoveKind::File | RemoveKind::Any) + + // | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) + | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { + if self.follow_name() { + if settings.retry { + if let Some(old_md) = self.files.get_mut_metadata(event_path) { + if old_md.is_tailable() && self.files.get(event_path).reader.is_some() { + show_error!( + "{} {}: {}", + display_name.quote(), + text::BECOME_INACCESSIBLE, + text::NO_SUCH_FILE + ); + } + } + if event_path.is_orphan() && !self.orphans.contains(event_path) { + show_error!("directory containing watched file was removed"); + show_error!( + "{} cannot be used, reverting to polling", + text::BACKEND + ); + self.orphans.push(event_path.to_owned()); + let _ = self.watcher_rx.as_mut().unwrap().unwatch(event_path); + } + } else { + show_error!("{}: {}", display_name, text::NO_SUCH_FILE); + if !self.files.files_remaining() && self.use_polling { + // NOTE: GNU's tail exits here for `---disable-inotify` + return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); + } + } + self.files.reset_reader(event_path); + } else if self.follow_descriptor_retry() { + // --retry only effective for the initial open + let _ = self.watcher_rx.as_mut().unwrap().unwatch(event_path); + self.files.remove(event_path); + } else if self.use_polling && event.kind == EventKind::Remove(RemoveKind::Any) { + /* + BUG: The watched file was removed. Since we're using Polling, this + could be a rename. We can't tell because `notify::PollWatcher` doesn't + recognize renames properly. + Ideally we want to call seek to offset 0 on the file handle. + But because we only have access to `PathData::reader` as `BufRead`, + we cannot seek to 0 with `BufReader::seek_relative`. + Also because we don't have the new name, we cannot work around this + by simply reopening the file. + */ + } + } + EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => { + /* + NOTE: For `tail -f a`, keep tracking additions to b after `mv a b` + (gnu/tests/tail-2/descriptor-vs-rename.sh) + NOTE: The File/BufReader doesn't need to be updated. + However, we need to update our `files.map`. + This can only be done for inotify, because this EventKind does not + trigger for the PollWatcher. + BUG: As a result, there's a bug if polling is used: + $ tail -f file_a ---disable-inotify + $ mv file_a file_b + $ echo A >> file_b + $ echo A >> file_a + The last append to file_a is printed, however this shouldn't be because + after the "mv" tail should only follow "file_b". + TODO: [2022-05; jhscheer] add test for this bug + */ + + if self.follow_descriptor() { + let new_path = event.paths.last().unwrap(); + paths.push(new_path.to_owned()); + + let new_data = PathData::from_other_with_path(self.files.remove(event_path), new_path); + self.files.insert( + new_path, + new_data, + self.files.get_last().unwrap() == event_path + ); + + // Unwatch old path and watch new path + let _ = self.watcher_rx.as_mut().unwrap().unwatch(event_path); + self.watcher_rx.as_mut().unwrap().watch_with_parent(new_path)?; + } + } + _ => {} + } + Ok(paths) + } +} + +pub fn follow(mut watcher_service: WatcherService, settings: &Settings) -> UResult<()> { + if watcher_service.files.no_files_remaining(settings) + && !watcher_service.files.only_stdin_remaining() + { + return Err(USimpleError::new(1, text::NO_FILES_REMAINING.to_string())); + } + + let mut process = platform::ProcessChecker::new(settings.pid); + + let mut _event_counter = 0; + let mut _timeout_counter = 0; + + // main follow loop + loop { + let mut _read_some = false; + + // If `--pid=p`, tail checks whether process p + // is alive at least every `--sleep-interval=N` seconds + if settings.follow.is_some() && settings.pid != 0 && process.is_dead() { + // p is dead, tail will also terminate + break; + } + + // For `-F` we need to poll if an orphan path becomes available during runtime. + // If a path becomes an orphan during runtime, it will be added to orphans. + // To be able to differentiate between the cases of test_retry8 and test_retry9, + // here paths will not be removed from orphans if the path becomes available. + if watcher_service.follow_name_retry() { + for new_path in &watcher_service.orphans { + if new_path.exists() { + let pd = watcher_service.files.get(new_path); + let md = new_path.metadata().unwrap(); + if md.is_tailable() && pd.reader.is_none() { + show_error!( + "{} has appeared; following new file", + pd.display_name.quote() + ); + watcher_service.files.update_metadata(new_path, Some(md)); + watcher_service.files.update_reader(new_path)?; + _read_some = watcher_service + .files + .tail_file(new_path, settings.verbose)?; + watcher_service + .watcher_rx + .as_mut() + .unwrap() + .watch_with_parent(new_path)?; + } + } + } + } + + // With -f, sleep for approximately N seconds (default 1.0) between iterations; + // We wake up if Notify sends an Event or if we wait more than `sleep_sec`. + let rx_result = watcher_service + .watcher_rx + .as_mut() + .unwrap() + .receiver + .recv_timeout(settings.sleep_sec); + if rx_result.is_ok() { + _event_counter += 1; + _timeout_counter = 0; + } + + let mut paths = vec![]; // Paths worth checking for new content to print + match rx_result { + Ok(Ok(event)) => { + if let Some(event_path) = event.paths.first() { + if watcher_service.files.contains_key(event_path) { + // Handle Event if it is about a path that we are monitoring + paths = watcher_service.handle_event(&event, settings)?; + } + } + } + Ok(Err(notify::Error { + kind: notify::ErrorKind::Io(ref e), + paths, + })) if e.kind() == std::io::ErrorKind::NotFound => { + if let Some(event_path) = paths.first() { + if watcher_service.files.contains_key(event_path) { + let _ = watcher_service + .watcher_rx + .as_mut() + .unwrap() + .watcher + .unwatch(event_path); + } + } + } + Ok(Err(notify::Error { + kind: notify::ErrorKind::MaxFilesWatch, + .. + })) => { + return Err(USimpleError::new( + 1, + format!("{} resources exhausted", text::BACKEND), + )) + } + Ok(Err(e)) => return Err(USimpleError::new(1, format!("NotifyError: {}", e))), + Err(mpsc::RecvTimeoutError::Timeout) => { + _timeout_counter += 1; + } + Err(e) => return Err(USimpleError::new(1, format!("RecvTimeoutError: {}", e))), + } + + if watcher_service.use_polling && settings.follow.is_some() { + // Consider all files to potentially have new content. + // This is a workaround because `Notify::PollWatcher` + // does not recognize the "renaming" of files. + paths = watcher_service.files.keys().cloned().collect::>(); + } + + // main print loop + for path in &paths { + _read_some = watcher_service.files.tail_file(path, settings.verbose)?; + } + + if _timeout_counter == settings.max_unchanged_stats { + /* + TODO: [2021-10; jhscheer] implement timeout_counter for each file. + ā€˜--max-unchanged-stats=nā€™ + When tailing a file by name, if there have been n (default n=5) consecutive iterations + for which the file has not changed, then open/fstat the file to determine if that file + name is still associated with the same device/inode-number pair as before. When + following a log file that is rotated, this is approximately the number of seconds + between when tail prints the last pre-rotation lines and when it prints the lines that + have accumulated in the new log file. This option is meaningful only when polling + (i.e., without inotify) and when following by name. + */ + } + } + Ok(()) +} diff --git a/src/uu/tail/src/paths.rs b/src/uu/tail/src/paths.rs new file mode 100644 index 000000000..0ebb265b2 --- /dev/null +++ b/src/uu/tail/src/paths.rs @@ -0,0 +1,279 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore tailable seekable stdlib (stdlib) + +#[cfg(unix)] +use std::os::unix::fs::{FileTypeExt, MetadataExt}; + +use std::collections::VecDeque; +use std::fs::{File, Metadata}; +use std::io::{Seek, SeekFrom}; +use std::path::{Path, PathBuf}; + +use uucore::error::UResult; + +use crate::args::Settings; +use crate::text; + +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +#[derive(Debug, Clone)] +pub enum InputKind { + File(PathBuf), + Stdin, +} + +#[derive(Debug, Clone)] +pub struct Input { + kind: InputKind, + pub display_name: String, +} + +impl Input { + pub fn from(string: String) -> Self { + let kind = if string == text::DASH { + InputKind::Stdin + } else { + InputKind::File(PathBuf::from(&string)) + }; + + let display_name = match kind { + InputKind::File(_) => string, + InputKind::Stdin => { + if cfg!(unix) { + text::STDIN_HEADER.to_string() + } else { + string + } + } + }; + + Self { kind, display_name } + } + + pub fn kind(&self) -> &InputKind { + &self.kind + } + + pub fn is_stdin(&self) -> bool { + match self.kind { + InputKind::File(_) => false, + InputKind::Stdin => true, + } + } + + pub fn resolve(&self) -> Option { + match &self.kind { + InputKind::File(path) if path != &PathBuf::from(text::DEV_STDIN) => { + path.canonicalize().ok() + } + InputKind::File(_) | InputKind::Stdin => { + if cfg!(unix) { + PathBuf::from(text::DEV_STDIN).canonicalize().ok() + } else { + None + } + } + } + } + + pub fn is_tailable(&self) -> bool { + match &self.kind { + InputKind::File(path) => path_is_tailable(path), + InputKind::Stdin => self.resolve().map_or(false, |path| path_is_tailable(&path)), + } + } +} + +impl Default for Input { + fn default() -> Self { + Self { + kind: InputKind::Stdin, + display_name: String::from(text::STDIN_HEADER), + } + } +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct HeaderPrinter { + verbose: bool, + first_header: bool, +} + +impl HeaderPrinter { + pub fn new(verbose: bool, first_header: bool) -> Self { + Self { + verbose, + first_header, + } + } + + pub fn print_input(&mut self, input: &Input) { + self.print(input.display_name.as_str()); + } + + pub fn print(&mut self, string: &str) { + if self.verbose { + println!( + "{}==> {} <==", + if self.first_header { "" } else { "\n" }, + string, + ); + self.first_header = false; + } + } +} + +#[derive(Debug, Clone)] +pub struct InputService { + pub inputs: VecDeque, + pub presume_input_pipe: bool, + pub header_printer: HeaderPrinter, +} + +impl InputService { + pub fn new(verbose: bool, presume_input_pipe: bool, inputs: VecDeque) -> Self { + Self { + inputs, + presume_input_pipe, + header_printer: HeaderPrinter::new(verbose, true), + } + } + + pub fn from(settings: &Settings) -> Self { + Self::new( + settings.verbose, + settings.presume_input_pipe, + settings.inputs.clone(), + ) + } + + pub fn has_stdin(&mut self) -> bool { + self.inputs.iter().any(|input| input.is_stdin()) + } + + pub fn has_only_stdin(&self) -> bool { + self.inputs.iter().all(|input| input.is_stdin()) + } + + pub fn print_header(&mut self, input: &Input) { + self.header_printer.print_input(input); + } +} + +pub trait FileExtTail { + #[allow(clippy::wrong_self_convention)] + fn is_seekable(&mut self, current_offset: u64) -> bool; +} + +impl FileExtTail for File { + /// Test if File is seekable. + /// Set the current position offset to `current_offset`. + fn is_seekable(&mut self, current_offset: u64) -> bool { + self.seek(SeekFrom::Current(0)).is_ok() + && self.seek(SeekFrom::End(0)).is_ok() + && self.seek(SeekFrom::Start(current_offset)).is_ok() + } +} + +pub trait MetadataExtTail { + fn is_tailable(&self) -> bool; + fn got_truncated(&self, other: &Metadata) -> UResult; + fn get_block_size(&self) -> u64; + fn file_id_eq(&self, other: &Metadata) -> bool; +} + +impl MetadataExtTail for Metadata { + fn is_tailable(&self) -> bool { + let ft = self.file_type(); + #[cfg(unix)] + { + ft.is_file() || ft.is_char_device() || ft.is_fifo() + } + #[cfg(not(unix))] + { + ft.is_file() + } + } + + /// Return true if the file was modified and is now shorter + fn got_truncated(&self, other: &Metadata) -> UResult { + Ok(other.len() < self.len() && other.modified()? != self.modified()?) + } + + fn get_block_size(&self) -> u64 { + #[cfg(unix)] + { + self.blocks() + } + #[cfg(not(unix))] + { + self.len() + } + } + + fn file_id_eq(&self, _other: &Metadata) -> bool { + #[cfg(unix)] + { + self.ino().eq(&_other.ino()) + } + #[cfg(windows)] + { + // use std::os::windows::prelude::*; + // if let Some(self_id) = self.file_index() { + // if let Some(other_id) = other.file_index() { + // + // return self_id.eq(&other_id); + // } + // } + false + } + } +} + +pub trait PathExtTail { + fn is_stdin(&self) -> bool; + fn is_orphan(&self) -> bool; + fn is_tailable(&self) -> bool; +} + +impl PathExtTail for Path { + fn is_stdin(&self) -> bool { + self.eq(Self::new(text::DASH)) + || self.eq(Self::new(text::DEV_STDIN)) + || self.eq(Self::new(text::STDIN_HEADER)) + } + + /// Return true if `path` does not have an existing parent directory + fn is_orphan(&self) -> bool { + !matches!(self.parent(), Some(parent) if parent.is_dir()) + } + + /// Return true if `path` is is a file type that can be tailed + fn is_tailable(&self) -> bool { + path_is_tailable(self) + } +} + +pub fn path_is_tailable(path: &Path) -> bool { + path.is_file() || path.exists() && path.metadata().map_or(false, |meta| meta.is_tailable()) +} + +#[inline] +pub fn stdin_is_bad_fd() -> bool { + // FIXME : Rust's stdlib is reopening fds as /dev/null + // see also: https://github.com/uutils/coreutils/issues/2873 + // (gnu/tests/tail-2/follow-stdin.sh fails because of this) + //#[cfg(unix)] + { + //platform::stdin_is_bad_fd() + } + //#[cfg(not(unix))] + false +} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 29176ae5d..2a76522d3 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -7,7 +7,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle +// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle Signum // spell-checker:ignore (libs) kqueue // spell-checker:ignore (acronyms) // spell-checker:ignore (env/flags) @@ -23,461 +23,217 @@ extern crate clap; extern crate uucore; extern crate core; +pub mod args; pub mod chunks; +mod follow; mod parse; +mod paths; mod platform; -use crate::files::FileHandling; -use chunks::ReverseChunks; +pub mod text; -use clap::{Arg, Command, ValueSource}; -use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind}; +use same_file::Handle; use std::cmp::Ordering; -use std::collections::{HashMap, VecDeque}; -use std::ffi::OsString; -use std::fs::{File, Metadata}; +use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; -use std::sync::mpsc::{self, channel, Receiver}; -use std::time::Duration; + use uucore::display::Quotable; -use uucore::error::{ - get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError, -}; -use uucore::format_usage; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::error::{get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError}; -#[cfg(unix)] -use std::os::unix::fs::MetadataExt; -#[cfg(unix)] -use std::os::unix::prelude::FileTypeExt; - -const ABOUT: &str = "\ - Print the last 10 lines of each FILE to standard output.\n\ - With more than one FILE, precede each with a header giving the file name.\n\ - With no FILE, or when FILE is -, read standard input.\n\ - \n\ - Mandatory arguments to long flags are mandatory for short flags too.\ - "; -const USAGE: &str = "{} [FLAG]... [FILE]..."; - -pub mod text { - pub static DASH: &str = "-"; - pub static DEV_STDIN: &str = "/dev/stdin"; - pub static STDIN_HEADER: &str = "standard input"; - pub static NO_FILES_REMAINING: &str = "no files remaining"; - pub static NO_SUCH_FILE: &str = "No such file or directory"; - pub static BECOME_INACCESSIBLE: &str = "has become inaccessible"; - pub static BAD_FD: &str = "Bad file descriptor"; - #[cfg(target_os = "linux")] - pub static BACKEND: &str = "inotify"; - #[cfg(all(unix, not(target_os = "linux")))] - pub static BACKEND: &str = "kqueue"; - #[cfg(target_os = "windows")] - pub static BACKEND: &str = "ReadDirectoryChanges"; -} - -pub mod options { - pub mod verbosity { - pub static QUIET: &str = "quiet"; - pub static VERBOSE: &str = "verbose"; - } - pub static BYTES: &str = "bytes"; - pub static FOLLOW: &str = "follow"; - pub static LINES: &str = "lines"; - pub static PID: &str = "pid"; - pub static SLEEP_INT: &str = "sleep-interval"; - pub static ZERO_TERM: &str = "zero-terminated"; - pub static DISABLE_INOTIFY_TERM: &str = "-disable-inotify"; // NOTE: three hyphens is correct - pub static USE_POLLING: &str = "use-polling"; - pub static RETRY: &str = "retry"; - pub static FOLLOW_RETRY: &str = "F"; - pub static MAX_UNCHANGED_STATS: &str = "max-unchanged-stats"; - pub static ARG_FILES: &str = "files"; - pub static PRESUME_INPUT_PIPE: &str = "-presume-input-pipe"; // NOTE: three hyphens is correct -} - -#[derive(Debug, PartialEq, Eq)] -enum FilterMode { - Bytes(u64), - Lines(u64, u8), // (number of lines, delimiter) -} - -impl Default for FilterMode { - fn default() -> Self { - Self::Lines(10, b'\n') - } -} - -#[derive(Debug, PartialEq, Eq)] -enum FollowMode { - Descriptor, - Name, -} - -#[derive(Debug, Default)] -pub struct Settings { - beginning: bool, - follow: Option, - max_unchanged_stats: u32, - mode: FilterMode, - paths: VecDeque, - pid: platform::Pid, - retry: bool, - sleep_sec: Duration, - use_polling: bool, - verbose: bool, - stdin_is_pipe_or_fifo: bool, - stdin_offset: u64, - stdin_redirect: PathBuf, -} - -impl Settings { - pub fn from(matches: &clap::ArgMatches) -> UResult { - let mut settings: Self = Self { - sleep_sec: Duration::from_secs_f32(1.0), - max_unchanged_stats: 5, - ..Default::default() - }; - - settings.follow = if matches.contains_id(options::FOLLOW_RETRY) { - Some(FollowMode::Name) - } else if matches.value_source(options::FOLLOW) != Some(ValueSource::CommandLine) { - None - } else if matches.value_of(options::FOLLOW) == Some("name") { - Some(FollowMode::Name) - } else { - Some(FollowMode::Descriptor) - }; - - if let Some(s) = matches.value_of(options::SLEEP_INT) { - settings.sleep_sec = match s.parse::() { - Ok(s) => Duration::from_secs_f32(s), - Err(_) => { - return Err(UUsageError::new( - 1, - format!("invalid number of seconds: {}", s.quote()), - )) - } - } - } - - settings.use_polling = matches.contains_id(options::USE_POLLING); - - if let Some(s) = matches.value_of(options::MAX_UNCHANGED_STATS) { - settings.max_unchanged_stats = match s.parse::() { - Ok(s) => s, - Err(_) => { - // TODO: [2021-10; jhscheer] add test for this - return Err(UUsageError::new( - 1, - format!( - "invalid maximum number of unchanged stats between opens: {}", - s.quote() - ), - )); - } - } - } - - if let Some(pid_str) = matches.value_of(options::PID) { - match pid_str.parse() { - Ok(pid) => { - // NOTE: on unix platform::Pid is i32, on windows platform::Pid is u32 - #[cfg(unix)] - if pid < 0 { - // NOTE: tail only accepts an unsigned pid - return Err(USimpleError::new( - 1, - format!("invalid PID: {}", pid_str.quote()), - )); - } - settings.pid = pid; - if settings.follow.is_none() { - show_warning!("PID ignored; --pid=PID is useful only when following"); - } - if !platform::supports_pid_checks(settings.pid) { - show_warning!("--pid=PID is not supported on this system"); - settings.pid = 0; - } - } - Err(e) => { - return Err(USimpleError::new( - 1, - format!("invalid PID: {}: {}", pid_str.quote(), e), - )); - } - } - } - - let mut starts_with_plus = false; // support for legacy format (+0) - let mode_and_beginning = if let Some(arg) = matches.value_of(options::BYTES) { - starts_with_plus = arg.starts_with('+'); - match parse_num(arg) { - Ok((n, beginning)) => (FilterMode::Bytes(n), beginning), - Err(e) => { - return Err(UUsageError::new( - 1, - format!("invalid number of bytes: {}", e), - )) - } - } - } else if let Some(arg) = matches.value_of(options::LINES) { - starts_with_plus = arg.starts_with('+'); - match parse_num(arg) { - Ok((n, beginning)) => (FilterMode::Lines(n, b'\n'), beginning), - Err(e) => { - return Err(UUsageError::new( - 1, - format!("invalid number of lines: {}", e), - )) - } - } - } else { - (FilterMode::default(), false) - }; - settings.mode = mode_and_beginning.0; - settings.beginning = mode_and_beginning.1; - - // Mimic GNU's tail for -[nc]0 without -f and exit immediately - if settings.follow.is_none() && !starts_with_plus && { - if let FilterMode::Lines(l, _) = settings.mode { - l == 0 - } else { - settings.mode == FilterMode::Bytes(0) - } - } { - std::process::exit(0) - } - - settings.retry = - matches.contains_id(options::RETRY) || matches.contains_id(options::FOLLOW_RETRY); - - if settings.retry && settings.follow.is_none() { - show_warning!("--retry ignored; --retry is useful only when following"); - } - - if matches.contains_id(options::ZERO_TERM) { - if let FilterMode::Lines(count, _) = settings.mode { - settings.mode = FilterMode::Lines(count, 0); - } - } - - settings.stdin_is_pipe_or_fifo = matches.contains_id(options::PRESUME_INPUT_PIPE); - - settings.paths = matches - .get_many::(options::ARG_FILES) - .map(|v| v.map(PathBuf::from).collect()) - .unwrap_or_default(); - - settings.verbose = (matches.contains_id(options::verbosity::VERBOSE) - || settings.paths.len() > 1) - && !matches.contains_id(options::verbosity::QUIET); - - Ok(settings) - } - - fn follow_descriptor(&self) -> bool { - self.follow == Some(FollowMode::Descriptor) - } - - fn follow_name(&self) -> bool { - self.follow == Some(FollowMode::Name) - } - - fn follow_descriptor_retry(&self) -> bool { - self.follow_descriptor() && self.retry - } - - fn follow_name_retry(&self) -> bool { - self.follow_name() && self.retry - } -} +pub use args::uu_app; +use args::{parse_args, FilterMode, Settings, Signum}; +use chunks::ReverseChunks; +use follow::WatcherService; +use paths::{FileExtTail, Input, InputKind, InputService, MetadataExtTail}; #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().try_get_matches_from(arg_iterate(args)?)?; - let mut settings = Settings::from(&matches)?; - - // skip expensive call to fstat if PRESUME_INPUT_PIPE is selected - if !settings.stdin_is_pipe_or_fifo { - settings.stdin_is_pipe_or_fifo = stdin_is_pipe_or_fifo(); - } - - uu_tail(settings) + let settings = parse_args(args)?; + uu_tail(&settings) } -fn uu_tail(mut settings: Settings) -> UResult<()> { - let dash = PathBuf::from(text::DASH); - +fn uu_tail(settings: &Settings) -> UResult<()> { // Mimic GNU's tail for `tail -F` and exit immediately - if (settings.paths.is_empty() || settings.paths.contains(&dash)) && settings.follow_name() { + let mut input_service = InputService::from(settings); + let mut watcher_service = WatcherService::from(settings); + + if input_service.has_stdin() && watcher_service.follow_name() { return Err(USimpleError::new( 1, format!("cannot follow {} by name", text::DASH.quote()), )); } - settings.stdin_redirect = dash.handle_redirect(); - if cfg!(unix) && settings.stdin_is_pipe_or_fifo { - // Save the current seek position/offset of a stdin redirected file. - // This is needed to pass "gnu/tests/tail-2/start-middle.sh" - use same_file::Handle; - if let Ok(mut stdin_handle) = Handle::stdin() { - if let Ok(offset) = stdin_handle.as_file_mut().seek(SeekFrom::Current(0)) { - settings.stdin_offset = offset; - } - } - } - - // add '-' to paths - if !settings.paths.contains(&dash) && settings.stdin_is_pipe_or_fifo - || settings.paths.is_empty() && !settings.stdin_is_pipe_or_fifo - { - settings.paths.push_front(dash); - } - - // TODO: is there a better way to check for a readable stdin? - let mut buf = [0; 0]; // empty buffer to check if stdin().read().is_err() - let stdin_read_possible = settings.stdin_is_pipe_or_fifo && stdin().read(&mut buf).is_ok(); - - let mut first_header = true; - let mut files = FileHandling::with_capacity(settings.paths.len()); - let mut orphans = Vec::new(); - - let mut watcher_rx = if settings.follow.is_some() { - let (watcher, rx) = start_watcher_thread(&mut settings)?; - Some((watcher, rx)) - } else { - None - }; - - // Iterate user provided `paths` and add them to Watcher. - if let Some((ref mut watcher, _)) = watcher_rx { - for path in &settings.paths { - let mut path = if path.is_stdin() { - settings.stdin_redirect.to_owned() - } else { - path.to_owned() - }; - if path.is_stdin() { - continue; - } - #[cfg(all(unix, not(target_os = "linux")))] - if !path.is_file() { - continue; - } - if path.is_relative() { - path = std::env::current_dir()?.join(&path); - } - - if path.is_tailable() { - // Add existing regular files to `Watcher` (InotifyWatcher). - watcher.watch_with_parent(&path)?; - } else if !path.is_orphan() { - // If `path` is not a tailable file, add its parent to `Watcher`. - watcher - .watch(path.parent().unwrap(), RecursiveMode::NonRecursive) - .unwrap(); - } else { - // If there is no parent, add `path` to `orphans`. - orphans.push(path.to_owned()); - } - } - } - + watcher_service.start(settings)?; // Do an initial tail print of each path's content. // Add `path` and `reader` to `files` map if `--follow` is selected. - for path in &settings.paths { - let display_name = if cfg!(unix) && path.is_stdin() { - PathBuf::from(text::STDIN_HEADER) - } else { - path.to_owned() - }; - let path = if path.is_stdin() { - settings.stdin_redirect.to_owned() - } else { - path.to_owned() - }; - let path_is_tailable = path.is_tailable(); - - if !path.is_stdin() && !path_is_tailable { - if settings.follow_descriptor_retry() { - show_warning!("--retry only effective for the initial open"); + for input in &input_service.inputs.clone() { + match input.kind() { + InputKind::File(path) if cfg!(not(unix)) || path != &PathBuf::from(text::DEV_STDIN) => { + tail_file( + settings, + &mut input_service, + input, + path, + &mut watcher_service, + 0, + )?; } - - if !path.exists() && !settings.stdin_is_pipe_or_fifo { - set_exit_code(1); - show_error!( - "cannot open {} for reading: {}", - display_name.quote(), - text::NO_SUCH_FILE - ); - } else if path.is_dir() || display_name.is_stdin() && !stdin_read_possible { - if settings.verbose { - files.print_header(&display_name, !first_header); - first_header = false; - } - let err_msg = "Is a directory".to_string(); - - // NOTE: On macOS path.is_dir() can be false for directories - // if it was a redirect, e.g. `$ tail < DIR` - // if !path.is_dir() { - // TODO: match against ErrorKind if unstable - // library feature "io_error_more" becomes stable - // if let Err(e) = stdin().read(&mut buf) { - // if e.kind() != std::io::ErrorKind::IsADirectory { - // err_msg = e.message.to_string(); - // } - // } - // } - - set_exit_code(1); - show_error!("error reading {}: {}", display_name.quote(), err_msg); - if settings.follow.is_some() { - let msg = if !settings.retry { - "; giving up on this name" - } else { - "" - }; - show_error!( - "{}: cannot follow end of this type of file{}", - display_name.display(), - msg - ); - } - if !(settings.follow_name_retry()) { - // skip directory if not retry - continue; - } - } else { - // TODO: [2021-10; jhscheer] how to handle block device or socket? - todo!(); + // File points to /dev/stdin here + InputKind::File(_) | InputKind::Stdin => { + tail_stdin(settings, &mut input_service, input, &mut watcher_service)?; } } + } + if settings.follow.is_some() { + /* + POSIX specification regarding tail -f + If the input file is a regular file or if the file operand specifies a FIFO, do not + terminate after the last line of the input file has been copied, but read and copy + further bytes from the input file when they become available. If no file operand is + specified and standard input is a pipe or FIFO, the -f option shall be ignored. If + the input file is not a FIFO, pipe, or regular file, it is unspecified whether or + not the -f option shall be ignored. + */ + + if !input_service.has_only_stdin() { + follow::follow(watcher_service, settings)?; + } + } + + if get_exit_code() > 0 && paths::stdin_is_bad_fd() { + show_error!("-: {}", text::BAD_FD); + } + + Ok(()) +} + +fn tail_file( + settings: &Settings, + input_service: &mut InputService, + input: &Input, + path: &Path, + watcher_service: &mut WatcherService, + offset: u64, +) -> UResult<()> { + if watcher_service.follow_descriptor_retry() { + show_warning!("--retry only effective for the initial open"); + } + + if !path.exists() { + set_exit_code(1); + show_error!( + "cannot open '{}' for reading: {}", + input.display_name, + text::NO_SUCH_FILE + ); + watcher_service.add_bad_path(path, input.display_name.as_str(), false)?; + } else if path.is_dir() { + set_exit_code(1); + + input_service.print_header(input); + let err_msg = "Is a directory".to_string(); + + show_error!("error reading '{}': {}", input.display_name, err_msg); + if settings.follow.is_some() { + let msg = if !settings.retry { + "; giving up on this name" + } else { + "" + }; + show_error!( + "{}: cannot follow end of this type of file{}", + input.display_name, + msg + ); + } + if !(watcher_service.follow_name_retry()) { + // skip directory if not retry + return Ok(()); + } + watcher_service.add_bad_path(path, input.display_name.as_str(), false)?; + } else if input.is_tailable() { let metadata = path.metadata().ok(); - - if display_name.is_stdin() && !path.is_file() { - if settings.verbose { - files.print_header(&display_name, !first_header); - first_header = false; - } - - let mut reader = BufReader::new(stdin()); - if !stdin_is_bad_fd() { - unbounded_tail(&mut reader, &settings)?; - if settings.follow_descriptor() { - // Insert `stdin` into `files.map` - files.insert( - &path, - PathData { - reader: Some(Box::new(reader)), - metadata: None, - display_name, - }, - true, - ); + match File::open(&path) { + Ok(mut file) => { + input_service.print_header(input); + let mut reader; + if file.is_seekable(if input.is_stdin() { offset } else { 0 }) + && metadata.as_ref().unwrap().get_block_size() > 0 + { + bounded_tail(&mut file, settings); + reader = BufReader::new(file); + } else { + reader = BufReader::new(file); + unbounded_tail(&mut reader, settings)?; } + watcher_service.add_path( + path, + input.display_name.as_str(), + Some(Box::new(reader)), + true, + )?; + } + Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { + watcher_service.add_bad_path(path, input.display_name.as_str(), false)?; + show!(e.map_err_context(|| { + format!("cannot open '{}' for reading", input.display_name) + })); + } + Err(e) => { + watcher_service.add_bad_path(path, input.display_name.as_str(), false)?; + return Err(e.map_err_context(|| { + format!("cannot open '{}' for reading", input.display_name) + })); + } + } + } else { + watcher_service.add_bad_path(path, input.display_name.as_str(), false)?; + } + + Ok(()) +} + +fn tail_stdin( + settings: &Settings, + input_service: &mut InputService, + input: &Input, + watcher_service: &mut WatcherService, +) -> UResult<()> { + match input.resolve() { + // fifo + Some(path) => { + let mut stdin_offset = 0; + if cfg!(unix) { + // Save the current seek position/offset of a stdin redirected file. + // This is needed to pass "gnu/tests/tail-2/start-middle.sh" + if let Ok(mut stdin_handle) = Handle::stdin() { + if let Ok(offset) = stdin_handle.as_file_mut().seek(SeekFrom::Current(0)) { + stdin_offset = offset; + } + } + } + tail_file( + settings, + input_service, + input, + &path, + watcher_service, + stdin_offset, + )?; + } + // pipe + None => { + input_service.print_header(input); + if !paths::stdin_is_bad_fd() { + let mut reader = BufReader::new(stdin()); + unbounded_tail(&mut reader, settings)?; + watcher_service.add_stdin( + input.display_name.as_str(), + Some(Box::new(reader)), + true, + )?; } else { set_exit_code(1); show_error!( @@ -493,818 +249,12 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { ); } } - } else if path_is_tailable { - match File::open(&path) { - Ok(mut file) => { - if settings.verbose { - files.print_header(&display_name, !first_header); - first_header = false; - } - - let mut reader; - if file.is_seekable(if display_name.is_stdin() { - settings.stdin_offset - } else { - 0 - }) && metadata.as_ref().unwrap().get_block_size() > 0 - { - bounded_tail(&mut file, &settings); - reader = BufReader::new(file); - } else { - reader = BufReader::new(file); - unbounded_tail(&mut reader, &settings)?; - } - if settings.follow.is_some() { - // Insert existing/file `path` into `files.map` - files.insert( - &path, - PathData { - reader: Some(Box::new(reader)), - metadata, - display_name, - }, - true, - ); - } - } - Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { - show!(e.map_err_context(|| { - format!("cannot open {} for reading", display_name.quote()) - })); - } - Err(e) => { - return Err(e.map_err_context(|| { - format!("cannot open {} for reading", display_name.quote()) - })); - } - } - } else if settings.retry && settings.follow.is_some() { - let path = if path.is_relative() { - std::env::current_dir()?.join(&path) - } else { - path.to_owned() - }; - // Insert non-is_tailable() paths into `files.map` - files.insert( - &path, - PathData { - reader: None, - metadata, - display_name, - }, - false, - ); } - } - - if settings.follow.is_some() { - /* - POSIX specification regarding tail -f - If the input file is a regular file or if the file operand specifies a FIFO, do not - terminate after the last line of the input file has been copied, but read and copy - further bytes from the input file when they become available. If no file operand is - specified and standard input is a pipe or FIFO, the -f option shall be ignored. If - the input file is not a FIFO, pipe, or regular file, it is unspecified whether or - not the -f option shall be ignored. - */ - if files.no_files_remaining(&settings) { - if !files.only_stdin_remaining() { - show_error!("{}", text::NO_FILES_REMAINING); - } - } else if !(settings.stdin_is_pipe_or_fifo && settings.paths.len() == 1) { - follow(files, &settings, watcher_rx, orphans)?; - } - } - - if get_exit_code() > 0 && stdin_is_bad_fd() { - show_error!("-: {}", text::BAD_FD); - } + }; Ok(()) } -fn arg_iterate<'a>( - mut args: impl uucore::Args + 'a, -) -> Result + 'a>, Box<(dyn UError)>> { - // argv[0] is always present - let first = args.next().unwrap(); - if let Some(second) = args.next() { - if let Some(s) = second.to_str() { - match parse::parse_obsolete(s) { - Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), - Some(Err(e)) => Err(UUsageError::new( - 1, - match e { - parse::ParseError::Syntax => format!("bad argument format: {}", s.quote()), - parse::ParseError::Overflow => format!( - "invalid argument: {} Value too large for defined datatype", - s.quote() - ), - }, - )), - None => Ok(Box::new(vec![first, second].into_iter().chain(args))), - } - } else { - Err(UUsageError::new(1, "bad argument encoding".to_owned())) - } - } else { - Ok(Box::new(vec![first].into_iter())) - } -} - -pub fn uu_app<'a>() -> Command<'a> { - #[cfg(target_os = "linux")] - pub static POLLING_HELP: &str = "Disable 'inotify' support and use polling instead"; - #[cfg(all(unix, not(target_os = "linux")))] - pub static POLLING_HELP: &str = "Disable 'kqueue' support and use polling instead"; - #[cfg(target_os = "windows")] - pub static POLLING_HELP: &str = - "Disable 'ReadDirectoryChanges' support and use polling instead"; - - Command::new(uucore::util_name()) - .version(crate_version!()) - .about(ABOUT) - .override_usage(format_usage(USAGE)) - .infer_long_args(true) - .arg( - Arg::new(options::BYTES) - .short('c') - .long(options::BYTES) - .takes_value(true) - .allow_hyphen_values(true) - .overrides_with_all(&[options::BYTES, options::LINES]) - .help("Number of bytes to print"), - ) - .arg( - Arg::new(options::FOLLOW) - .short('f') - .long(options::FOLLOW) - .default_value("descriptor") - .takes_value(true) - .min_values(0) - .max_values(1) - .require_equals(true) - .value_parser(["descriptor", "name"]) - .help("Print the file as it grows"), - ) - .arg( - Arg::new(options::LINES) - .short('n') - .long(options::LINES) - .takes_value(true) - .allow_hyphen_values(true) - .overrides_with_all(&[options::BYTES, options::LINES]) - .help("Number of lines to print"), - ) - .arg( - Arg::new(options::PID) - .long(options::PID) - .takes_value(true) - .help("With -f, terminate after process ID, PID dies"), - ) - .arg( - Arg::new(options::verbosity::QUIET) - .short('q') - .long(options::verbosity::QUIET) - .visible_alias("silent") - .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) - .help("Never output headers giving file names"), - ) - .arg( - Arg::new(options::SLEEP_INT) - .short('s') - .takes_value(true) - .long(options::SLEEP_INT) - .help("Number of seconds to sleep between polling the file when running with -f"), - ) - .arg( - Arg::new(options::MAX_UNCHANGED_STATS) - .takes_value(true) - .long(options::MAX_UNCHANGED_STATS) - .help( - "Reopen a FILE which has not changed size after N (default 5) iterations \ - to see if it has been unlinked or renamed (this is the usual case of rotated \ - log files); This option is meaningful only when polling \ - (i.e., with --use-polling) and when --follow=name", - ), - ) - .arg( - Arg::new(options::verbosity::VERBOSE) - .short('v') - .long(options::verbosity::VERBOSE) - .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) - .help("Always output headers giving file names"), - ) - .arg( - Arg::new(options::ZERO_TERM) - .short('z') - .long(options::ZERO_TERM) - .help("Line delimiter is NUL, not newline"), - ) - .arg( - Arg::new(options::USE_POLLING) - .alias(options::DISABLE_INOTIFY_TERM) // NOTE: Used by GNU's test suite - .alias("dis") // NOTE: Used by GNU's test suite - .long(options::USE_POLLING) - .help(POLLING_HELP), - ) - .arg( - Arg::new(options::RETRY) - .long(options::RETRY) - .help("Keep trying to open a file if it is inaccessible"), - ) - .arg( - Arg::new(options::FOLLOW_RETRY) - .short('F') - .help("Same as --follow=name --retry") - .overrides_with_all(&[options::RETRY, options::FOLLOW]), - ) - .arg( - Arg::new(options::PRESUME_INPUT_PIPE) - .long(options::PRESUME_INPUT_PIPE) - .alias(options::PRESUME_INPUT_PIPE) - .hide(true), - ) - .arg( - Arg::new(options::ARG_FILES) - .multiple_occurrences(true) - .takes_value(true) - .min_values(1) - .value_hint(clap::ValueHint::FilePath), - ) -} - -type WatcherRx = ( - Box<(dyn Watcher)>, - Receiver>, -); - -fn follow( - mut files: FileHandling, - settings: &Settings, - watcher_rx: Option, - mut orphans: Vec, -) -> UResult<()> { - let (mut watcher, rx) = watcher_rx.unwrap(); - let mut process = platform::ProcessChecker::new(settings.pid); - - // TODO: [2021-10; jhscheer] - let mut _event_counter = 0; - let mut _timeout_counter = 0; - - // main follow loop - loop { - let mut _read_some = false; - - // If `--pid=p`, tail checks whether process p - // is alive at least every `--sleep-interval=N` seconds - if settings.follow.is_some() && settings.pid != 0 && process.is_dead() { - // p is dead, tail will also terminate - break; - } - - // For `-F` we need to poll if an orphan path becomes available during runtime. - // If a path becomes an orphan during runtime, it will be added to orphans. - // To be able to differentiate between the cases of test_retry8 and test_retry9, - // here paths will not be removed from orphans if the path becomes available. - if settings.follow_name_retry() { - for new_path in &orphans { - if new_path.exists() { - let pd = files.get(new_path); - let md = new_path.metadata().unwrap(); - if md.is_tailable() && pd.reader.is_none() { - show_error!( - "{} has appeared; following new file", - pd.display_name.quote() - ); - files.update_metadata(new_path, Some(md)); - files.update_reader(new_path)?; - _read_some = files.tail_file(new_path, settings.verbose)?; - watcher.watch_with_parent(new_path)?; - } - } - } - } - - // With -f, sleep for approximately N seconds (default 1.0) between iterations; - // We wake up if Notify sends an Event or if we wait more than `sleep_sec`. - let rx_result = rx.recv_timeout(settings.sleep_sec); - if rx_result.is_ok() { - _event_counter += 1; - _timeout_counter = 0; - } - - let mut paths = vec![]; // Paths worth checking for new content to print - match rx_result { - Ok(Ok(event)) => { - if let Some(event_path) = event.paths.first() { - if files.contains_key(event_path) { - // Handle Event if it is about a path that we are monitoring - paths = - handle_event(&event, &mut files, settings, &mut watcher, &mut orphans)?; - } - } - } - Ok(Err(notify::Error { - kind: notify::ErrorKind::Io(ref e), - paths, - })) if e.kind() == std::io::ErrorKind::NotFound => { - if let Some(event_path) = paths.first() { - if files.contains_key(event_path) { - let _ = watcher.unwatch(event_path); - } - } - } - Ok(Err(notify::Error { - kind: notify::ErrorKind::MaxFilesWatch, - .. - })) => { - return Err(USimpleError::new( - 1, - format!("{} resources exhausted", text::BACKEND), - )) - } - Ok(Err(e)) => return Err(USimpleError::new(1, format!("NotifyError: {}", e))), - Err(mpsc::RecvTimeoutError::Timeout) => { - _timeout_counter += 1; - } - Err(e) => return Err(USimpleError::new(1, format!("RecvTimeoutError: {}", e))), - } - - if settings.use_polling && settings.follow.is_some() { - // Consider all files to potentially have new content. - // This is a workaround because `Notify::PollWatcher` - // does not recognize the "renaming" of files. - paths = files.keys().cloned().collect::>(); - } - - // main print loop - for path in &paths { - _read_some = files.tail_file(path, settings.verbose)?; - } - - if _timeout_counter == settings.max_unchanged_stats { - /* - TODO: [2021-10; jhscheer] implement timeout_counter for each file. - ā€˜--max-unchanged-stats=nā€™ - When tailing a file by name, if there have been n (default n=5) consecutive iterations - for which the file has not changed, then open/fstat the file to determine if that file - name is still associated with the same device/inode-number pair as before. When - following a log file that is rotated, this is approximately the number of seconds - between when tail prints the last pre-rotation lines and when it prints the lines that - have accumulated in the new log file. This option is meaningful only when polling - (i.e., without inotify) and when following by name. - */ - } - } - Ok(()) -} - -fn start_watcher_thread(settings: &mut Settings) -> Result> { - let (tx, rx) = channel(); - - /* - Watcher is implemented per platform using the best implementation available on that - platform. In addition to such event driven implementations, a polling implementation - is also provided that should work on any platform. - Linux / Android: inotify - macOS: FSEvents / kqueue - Windows: ReadDirectoryChangesWatcher - FreeBSD / NetBSD / OpenBSD / DragonflyBSD: kqueue - Fallback: polling every n seconds - - NOTE: - We force the use of kqueue with: features=["macos_kqueue"]. - On macOS only `kqueue` is suitable for our use case because `FSEvents` - waits for file close util it delivers a modify event. See: - https://github.com/notify-rs/notify/issues/240 - */ - - let watcher: Box; - let watcher_config = notify::Config::default() - .with_poll_interval(settings.sleep_sec) - /* - NOTE: By enabling compare_contents, performance will be significantly impacted - as all files will need to be read and hashed at each `poll_interval`. - However, this is necessary to pass: "gnu/tests/tail-2/F-vs-rename.sh" - */ - .with_compare_contents(true); - if settings.use_polling || RecommendedWatcher::kind() == WatcherKind::PollWatcher { - settings.use_polling = true; // We have to use polling because there's no supported backend - watcher = Box::new(notify::PollWatcher::new(tx, watcher_config).unwrap()); - } else { - let tx_clone = tx.clone(); - match notify::RecommendedWatcher::new(tx, notify::Config::default()) { - Ok(w) => watcher = Box::new(w), - Err(e) if e.to_string().starts_with("Too many open files") => { - /* - NOTE: This ErrorKind is `Uncategorized`, but it is not recommended - to match an error against `Uncategorized` - NOTE: Could be tested with decreasing `max_user_instances`, e.g.: - `sudo sysctl fs.inotify.max_user_instances=64` - */ - show_error!( - "{} cannot be used, reverting to polling: Too many open files", - text::BACKEND - ); - set_exit_code(1); - settings.use_polling = true; - watcher = Box::new(notify::PollWatcher::new(tx_clone, watcher_config).unwrap()); - } - Err(e) => return Err(USimpleError::new(1, e.to_string())), - }; - } - Ok((watcher, rx)) -} - -fn handle_event( - event: ¬ify::Event, - files: &mut FileHandling, - settings: &Settings, - watcher: &mut Box, - orphans: &mut Vec, -) -> UResult> { - use notify::event::*; - - let event_path = event.paths.first().unwrap(); - let display_name = files.get_display_name(event_path); - let mut paths: Vec = vec![]; - - match event.kind { - EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime)) - // | EventKind::Access(AccessKind::Close(AccessMode::Write)) - | EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) - | EventKind::Modify(ModifyKind::Data(DataChange::Any)) - | EventKind::Modify(ModifyKind::Name(RenameMode::To)) => { - if let Ok(new_md) = event_path.metadata() { - let is_tailable = new_md.is_tailable(); - let pd = files.get(event_path); - if let Some(old_md) = &pd.metadata { - if is_tailable { - // We resume tracking from the start of the file, - // assuming it has been truncated to 0. This mimics GNU's `tail` - // behavior and is the usual truncation operation for log files. - if !old_md.is_tailable() { - show_error!( "{} has become accessible", display_name.quote()); - files.update_reader(event_path)?; - } else if pd.reader.is_none() { - show_error!( "{} has appeared; following new file", display_name.quote()); - files.update_reader(event_path)?; - } else if event.kind == EventKind::Modify(ModifyKind::Name(RenameMode::To)) - || (settings.use_polling - && !old_md.file_id_eq(&new_md)) { - show_error!( "{} has been replaced; following new file", display_name.quote()); - files.update_reader(event_path)?; - } else if old_md.got_truncated(&new_md)? { - show_error!("{}: file truncated", display_name.display()); - files.update_reader(event_path)?; - } - paths.push(event_path.to_owned()); - } else if !is_tailable && old_md.is_tailable() { - if pd.reader.is_some() { - files.reset_reader(event_path); - } else { - show_error!( - "{} has been replaced with an untailable file", - display_name.quote() - ); - } - } - } else if is_tailable { - show_error!( "{} has appeared; following new file", display_name.quote()); - files.update_reader(event_path)?; - paths.push(event_path.to_owned()); - } else if settings.retry { - if settings.follow_descriptor() { - show_error!( - "{} has been replaced with an untailable file; giving up on this name", - display_name.quote() - ); - let _ = watcher.unwatch(event_path); - files.remove(event_path); - if files.no_files_remaining(settings) { - return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); - } - } else { - show_error!( - "{} has been replaced with an untailable file", - display_name.quote() - ); - } - } - files.update_metadata(event_path, Some(new_md)); - } - } - EventKind::Remove(RemoveKind::File | RemoveKind::Any) - // | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) - | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { - if settings.follow_name() { - if settings.retry { - if let Some(old_md) = files.get_mut_metadata(event_path) { - if old_md.is_tailable() && files.get(event_path).reader.is_some() { - show_error!( - "{} {}: {}", - display_name.quote(), - text::BECOME_INACCESSIBLE, - text::NO_SUCH_FILE - ); - } - } - if event_path.is_orphan() && !orphans.contains(event_path) { - show_error!("directory containing watched file was removed"); - show_error!( - "{} cannot be used, reverting to polling", - text::BACKEND - ); - orphans.push(event_path.to_owned()); - let _ = watcher.unwatch(event_path); - } - } else { - show_error!("{}: {}", display_name.display(), text::NO_SUCH_FILE); - if !files.files_remaining() && settings.use_polling { - // NOTE: GNU's tail exits here for `---disable-inotify` - return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); - } - } - files.reset_reader(event_path); - } else if settings.follow_descriptor_retry() { - // --retry only effective for the initial open - let _ = watcher.unwatch(event_path); - files.remove(event_path); - } else if settings.use_polling && event.kind == EventKind::Remove(RemoveKind::Any) { - /* - BUG: The watched file was removed. Since we're using Polling, this - could be a rename. We can't tell because `notify::PollWatcher` doesn't - recognize renames properly. - Ideally we want to call seek to offset 0 on the file handle. - But because we only have access to `PathData::reader` as `BufRead`, - we cannot seek to 0 with `BufReader::seek_relative`. - Also because we don't have the new name, we cannot work around this - by simply reopening the file. - */ - } - } - EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => { - /* - NOTE: For `tail -f a`, keep tracking additions to b after `mv a b` - (gnu/tests/tail-2/descriptor-vs-rename.sh) - NOTE: The File/BufReader doesn't need to be updated. - However, we need to update our `files.map`. - This can only be done for inotify, because this EventKind does not - trigger for the PollWatcher. - BUG: As a result, there's a bug if polling is used: - $ tail -f file_a ---disable-inotify - $ mv file_a file_b - $ echo A >> file_b - $ echo A >> file_a - The last append to file_a is printed, however this shouldn't be because - after the "mv" tail should only follow "file_b". - TODO: [2022-05; jhscheer] add test for this bug - */ - - if settings.follow_descriptor() { - let new_path = event.paths.last().unwrap(); - paths.push(new_path.to_owned()); - // Remove old reader - let old_reader = files.remove(event_path).reader; - let reader = if old_reader.is_some() { - // Use old reader with the same file descriptor if there is one - old_reader - } else if let Ok(file) = File::open(&new_path) { - // Open new file tail from start - Some(Box::new(BufReader::new(file)) as Box) - } else { - // Probably file was renamed/moved or removed again - None - }; - // Add new reader but keep old display name - files.insert( - new_path, - PathData { - metadata: new_path.metadata().ok(), - reader, - display_name, // mimic GNU's tail and show old name in header - }, - files.get_last().unwrap() == event_path - ); - // Unwatch old path and watch new path - let _ = watcher.unwatch(event_path); - watcher.watch_with_parent(new_path)?; - } - } - _ => {} - } - Ok(paths) -} - -/// Data structure to keep a handle on the BufReader, Metadata -/// and the display_name (header_name) of files that are being followed. -pub struct PathData { - reader: Option>, - metadata: Option, - display_name: PathBuf, // the path as provided by user input, used for headers -} - -mod files { - use super::*; - use std::collections::hash_map::Keys; - - /// Data structure to keep a handle on files to follow. - /// `last` always holds the path/key of the last file that was printed from. - /// The keys of the HashMap can point to an existing file path (normal case), - /// or stdin ("-"), or to a non existing path (--retry). - /// For existing files, all keys in the HashMap are absolute Paths. - pub struct FileHandling { - map: HashMap, - last: Option, - } - - impl FileHandling { - /// Creates an empty `FileHandling` with the specified capacity - pub fn with_capacity(n: usize) -> Self { - Self { - map: HashMap::with_capacity(n), - last: None, - } - } - - /// Wrapper for HashMap::insert using Path::canonicalize - pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) { - let k = Self::canonicalize_path(k); - if update_last { - self.last = Some(k.to_owned()); - } - let _ = self.map.insert(k, v); - } - - /// Wrapper for HashMap::remove using Path::canonicalize - pub fn remove(&mut self, k: &Path) -> PathData { - self.map.remove(&Self::canonicalize_path(k)).unwrap() - } - - /// Wrapper for HashMap::get using Path::canonicalize - pub fn get(&self, k: &Path) -> &PathData { - self.map.get(&Self::canonicalize_path(k)).unwrap() - } - - /// Wrapper for HashMap::get_mut using Path::canonicalize - pub fn get_mut(&mut self, k: &Path) -> &mut PathData { - self.map.get_mut(&Self::canonicalize_path(k)).unwrap() - } - - /// Canonicalize `path` if it is not already an absolute path - fn canonicalize_path(path: &Path) -> PathBuf { - if path.is_relative() && !path.is_stdin() { - if let Ok(p) = path.canonicalize() { - return p; - } - } - path.to_owned() - } - - pub fn get_display_name(&self, path: &Path) -> PathBuf { - self.get(path).display_name.to_owned() - } - - pub fn get_mut_metadata(&mut self, path: &Path) -> Option<&Metadata> { - self.get_mut(path).metadata.as_ref() - } - - pub fn keys(&self) -> Keys { - self.map.keys() - } - - pub fn contains_key(&self, k: &Path) -> bool { - self.map.contains_key(k) - } - - pub fn get_last(&self) -> Option<&PathBuf> { - self.last.as_ref() - } - - /// Return true if there is only stdin remaining - pub fn only_stdin_remaining(&self) -> bool { - self.map.len() == 1 && (self.map.contains_key(Path::new(text::DASH))) - } - - /// Return true if there is at least one "tailable" path (or stdin) remaining - pub fn files_remaining(&self) -> bool { - for path in self.map.keys() { - if path.is_tailable() || path.is_stdin() { - return true; - } - } - false - } - - /// Returns true if there are no files remaining - pub fn no_files_remaining(&self, settings: &Settings) -> bool { - self.map.is_empty() || !self.files_remaining() && !settings.retry - } - - /// Set `reader` to None to indicate that `path` is not an existing file anymore. - pub fn reset_reader(&mut self, path: &Path) { - self.get_mut(path).reader = None; - } - - /// Reopen the file at the monitored `path` - pub fn update_reader(&mut self, path: &Path) -> UResult<()> { - /* - BUG: If it's not necessary to reopen a file, GNU's tail calls seek to offset 0. - However we can't call seek here because `BufRead` does not implement `Seek`. - As a workaround we always reopen the file even though this might not always - be necessary. - */ - self.get_mut(path) - .reader - .replace(Box::new(BufReader::new(File::open(&path)?))); - Ok(()) - } - - /// Reload metadata from `path`, or `metadata` - pub fn update_metadata(&mut self, path: &Path, metadata: Option) { - self.get_mut(path).metadata = if metadata.is_some() { - metadata - } else { - path.metadata().ok() - }; - } - - /// Read `path` from the current seek position forward - pub fn read_file(&mut self, path: &Path, buffer: &mut Vec) -> UResult { - let mut read_some = false; - let pd = self.get_mut(path).reader.as_mut(); - if let Some(reader) = pd { - loop { - match reader.read_until(b'\n', buffer) { - Ok(0) => break, - Ok(_) => { - read_some = true; - } - Err(err) => return Err(USimpleError::new(1, err.to_string())), - } - } - } - Ok(read_some) - } - - /// Print `buffer` to stdout - pub fn print_file(&self, buffer: &[u8]) -> UResult<()> { - let mut stdout = stdout(); - stdout - .write_all(buffer) - .map_err_context(|| String::from("write error"))?; - Ok(()) - } - - /// Read new data from `path` and print it to stdout - pub fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult { - let mut buffer = vec![]; - let read_some = self.read_file(path, &mut buffer)?; - if read_some { - if self.needs_header(path, verbose) { - self.print_header(path, true); - } - self.print_file(&buffer)?; - - self.last.replace(path.to_owned()); - self.update_metadata(path, None); - } - Ok(read_some) - } - - /// Decide if printing `path` needs a header based on when it was last printed - pub fn needs_header(&self, path: &Path, verbose: bool) -> bool { - if verbose { - if let Some(ref last) = self.last { - return !last.eq(&path); - } else { - return true; - } - } - false - } - - /// Print header for `path` to stdout - pub fn print_header(&self, path: &Path, needs_newline: bool) { - println!( - "{}==> {} <==", - if needs_newline { "\n" } else { "" }, - self.display_name(path) - ); - } - - /// Wrapper for `PathData::display_name` - pub fn display_name(&self, path: &Path) -> String { - if let Some(path) = self.map.get(&Self::canonicalize_path(path)) { - path.display_name.display().to_string() - } else { - path.display().to_string() - } - } - } -} - /// Find the index after the given number of instances of a given byte. /// /// This function reads through a given reader until `num_delimiters` @@ -1430,25 +380,33 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) { /// being a nice performance win for very large files. fn bounded_tail(file: &mut File, settings: &Settings) { // Find the position in the file to start printing from. - match (&settings.mode, settings.beginning) { - (FilterMode::Lines(count, delimiter), false) => { + // dbg!("bounded"); + // dbg!(&settings.mode); + match &settings.mode { + FilterMode::Lines(Signum::Negative(count), delimiter) => { backwards_thru_file(file, *count, *delimiter); } - (FilterMode::Lines(count, delimiter), true) => { - let i = forwards_thru_file(file, (*count).max(1) - 1, *delimiter).unwrap(); + FilterMode::Lines(Signum::Positive(count), delimiter) if count > &1 => { + let i = forwards_thru_file(file, *count - 1, *delimiter).unwrap(); file.seek(SeekFrom::Start(i as u64)).unwrap(); } - (FilterMode::Bytes(count), false) => { + FilterMode::Lines(Signum::MinusZero, _) => { + return; + } + FilterMode::Bytes(Signum::Negative(count)) => { let len = file.seek(SeekFrom::End(0)).unwrap(); file.seek(SeekFrom::End(-((*count).min(len) as i64))) .unwrap(); } - (FilterMode::Bytes(count), true) => { + FilterMode::Bytes(Signum::Positive(count)) if count > &1 => { // GNU `tail` seems to index bytes and lines starting at 1, not // at 0. It seems to treat `+0` and `+1` as the same thing. - file.seek(SeekFrom::Start(((*count).max(1) - 1) as u64)) - .unwrap(); + file.seek(SeekFrom::Start(*count - 1)).unwrap(); } + FilterMode::Bytes(Signum::MinusZero) => { + return; + } + _ => {} } // Print the target section of the file. @@ -1460,14 +418,19 @@ fn bounded_tail(file: &mut File, settings: &Settings) { fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UResult<()> { let stdout = stdout(); let mut writer = BufWriter::new(stdout.lock()); - match (&settings.mode, settings.beginning) { - (FilterMode::Lines(count, sep), false) => { + // dbg!("unbounded"); + // dbg!(&settings.mode); + match &settings.mode { + FilterMode::Lines(Signum::Negative(count), sep) => { let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count); chunks.fill(reader)?; chunks.print(writer)?; } - (FilterMode::Lines(count, sep), true) => { - let mut num_skip = (*count).max(1) - 1; + FilterMode::Lines(Signum::PlusZero | Signum::Positive(1), _) => { + io::copy(reader, &mut writer)?; + } + FilterMode::Lines(Signum::Positive(count), sep) => { + let mut num_skip = *count - 1; let mut chunk = chunks::LinesChunk::new(*sep); while chunk.fill(reader)?.is_some() { let lines = chunk.get_lines() as u64; @@ -1482,13 +445,16 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UR io::copy(reader, &mut writer)?; } } - (FilterMode::Bytes(count), false) => { + FilterMode::Bytes(Signum::Negative(count)) => { let mut chunks = chunks::BytesChunkBuffer::new(*count); chunks.fill(reader)?; chunks.print(writer)?; } - (FilterMode::Bytes(count), true) => { - let mut num_skip = (*count).max(1) - 1; + FilterMode::Bytes(Signum::PlusZero | Signum::Positive(1)) => { + io::copy(reader, &mut writer)?; + } + FilterMode::Bytes(Signum::Positive(count)) => { + let mut num_skip = *count - 1; let mut chunk = chunks::BytesChunk::new(); loop { if let Some(bytes) = chunk.fill(reader)? { @@ -1510,204 +476,11 @@ fn unbounded_tail(reader: &mut BufReader, settings: &Settings) -> UR io::copy(reader, &mut writer)?; } + _ => {} } Ok(()) } -fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { - let mut size_string = src.trim(); - let mut starting_with = false; - - if let Some(c) = size_string.chars().next() { - if c == '+' || c == '-' { - // tail: '-' is not documented (8.32 man pages) - size_string = &size_string[1..]; - if c == '+' { - starting_with = true; - } - } - } else { - return Err(ParseSizeError::ParseFailure(src.to_string())); - } - - parse_size(size_string).map(|n| (n, starting_with)) -} - -pub fn stdin_is_pipe_or_fifo() -> bool { - #[cfg(unix)] - { - platform::stdin_is_pipe_or_fifo() - } - #[cfg(windows)] - { - winapi_util::file::typ(winapi_util::HandleRef::stdin()) - .map(|t| t.is_disk() || t.is_pipe()) - .unwrap_or(false) - } -} -#[inline] -pub fn stdin_is_bad_fd() -> bool { - // FIXME : Rust's stdlib is reopening fds as /dev/null - // see also: https://github.com/uutils/coreutils/issues/2873 - // (gnu/tests/tail-2/follow-stdin.sh fails because of this) - //#[cfg(unix)] - { - //platform::stdin_is_bad_fd() - } - //#[cfg(not(unix))] - false -} - -trait FileExtTail { - #[allow(clippy::wrong_self_convention)] - fn is_seekable(&mut self, current_offset: u64) -> bool; -} - -impl FileExtTail for File { - /// Test if File is seekable. - /// Set the current position offset to `current_offset`. - fn is_seekable(&mut self, current_offset: u64) -> bool { - self.seek(SeekFrom::Current(0)).is_ok() - && self.seek(SeekFrom::End(0)).is_ok() - && self.seek(SeekFrom::Start(current_offset)).is_ok() - } -} - -trait MetadataExtTail { - fn is_tailable(&self) -> bool; - fn got_truncated(&self, other: &Metadata) -> Result>; - fn get_block_size(&self) -> u64; - fn file_id_eq(&self, other: &Metadata) -> bool; -} - -impl MetadataExtTail for Metadata { - fn is_tailable(&self) -> bool { - let ft = self.file_type(); - #[cfg(unix)] - { - ft.is_file() || ft.is_char_device() || ft.is_fifo() - } - #[cfg(not(unix))] - { - ft.is_file() - } - } - - /// Return true if the file was modified and is now shorter - fn got_truncated(&self, other: &Metadata) -> Result> { - Ok(other.len() < self.len() && other.modified()? != self.modified()?) - } - - fn get_block_size(&self) -> u64 { - #[cfg(unix)] - { - self.blocks() - } - #[cfg(not(unix))] - { - self.len() - } - } - - fn file_id_eq(&self, _other: &Metadata) -> bool { - #[cfg(unix)] - { - self.ino().eq(&_other.ino()) - } - #[cfg(windows)] - { - // TODO: `file_index` requires unstable library feature `windows_by_handle` - // use std::os::windows::prelude::*; - // if let Some(self_id) = self.file_index() { - // if let Some(other_id) = other.file_index() { - // // TODO: not sure this is the equivalent of comparing inode numbers - // return self_id.eq(&other_id); - // } - // } - false - } - } -} - -trait PathExtTail { - fn is_stdin(&self) -> bool; - fn is_orphan(&self) -> bool; - fn is_tailable(&self) -> bool; - fn handle_redirect(&self) -> PathBuf; -} - -impl PathExtTail for Path { - fn is_stdin(&self) -> bool { - self.eq(Self::new(text::DASH)) - || self.eq(Self::new(text::DEV_STDIN)) - || self.eq(Self::new(text::STDIN_HEADER)) - } - - /// Return true if `path` does not have an existing parent directory - fn is_orphan(&self) -> bool { - !matches!(self.parent(), Some(parent) if parent.is_dir()) - } - - /// Return true if `path` is is a file type that can be tailed - fn is_tailable(&self) -> bool { - self.is_file() || self.exists() && self.metadata().unwrap().is_tailable() - } - /// Workaround to handle redirects, e.g. `touch f && tail -f - < f` - fn handle_redirect(&self) -> PathBuf { - if cfg!(unix) && self.is_stdin() { - if let Ok(p) = Self::new(text::DEV_STDIN).canonicalize() { - return p; - } else { - return PathBuf::from(text::DEV_STDIN); - } - } - self.into() - } -} - -trait WatcherExtTail { - fn watch_with_parent(&mut self, path: &Path) -> UResult<()>; -} - -impl WatcherExtTail for dyn Watcher { - /// Wrapper for `notify::Watcher::watch` to also add the parent directory of `path` if necessary. - fn watch_with_parent(&mut self, path: &Path) -> UResult<()> { - let mut path = path.to_owned(); - #[cfg(target_os = "linux")] - if path.is_file() { - /* - NOTE: Using the parent directory instead of the file is a workaround. - This workaround follows the recommendation of the notify crate authors: - > On some platforms, if the `path` is renamed or removed while being watched, behavior may - > be unexpected. See discussions in [#165] and [#166]. If less surprising behavior is wanted - > one may non-recursively watch the _parent_ directory as well and manage related events. - NOTE: Adding both: file and parent results in duplicate/wrong events. - Tested for notify::InotifyWatcher and for notify::PollWatcher. - */ - if let Some(parent) = path.parent() { - if parent.is_dir() { - path = parent.to_owned(); - } else { - path = PathBuf::from("."); - } - } else { - // TODO: [2021-10; jhscheer] add test for this - "cannot watch parent directory" - return Err(USimpleError::new( - 1, - format!("cannot watch parent directory of {}", path.display()), - )); - }; - } - if path.is_relative() { - path = path.canonicalize()?; - } - // TODO: [2022-05; jhscheer] "gnu/tests/tail-2/inotify-rotate-resource.sh" is looking - // for syscalls: 2x "inotify_add_watch" ("filename" and ".") and 1x "inotify_rm_watch" - self.watch(&path, RecursiveMode::NonRecursive).unwrap(); - Ok(()) - } -} - #[cfg(test)] mod tests { diff --git a/src/uu/tail/src/text.rs b/src/uu/tail/src/text.rs new file mode 100644 index 000000000..fba3968dd --- /dev/null +++ b/src/uu/tail/src/text.rs @@ -0,0 +1,20 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (ToDO) kqueue + +pub static DASH: &str = "-"; +pub static DEV_STDIN: &str = "/dev/stdin"; +pub static STDIN_HEADER: &str = "standard input"; +pub static NO_FILES_REMAINING: &str = "no files remaining"; +pub static NO_SUCH_FILE: &str = "No such file or directory"; +pub static BECOME_INACCESSIBLE: &str = "has become inaccessible"; +pub static BAD_FD: &str = "Bad file descriptor"; +#[cfg(target_os = "linux")] +pub static BACKEND: &str = "inotify"; +#[cfg(all(unix, not(target_os = "linux")))] +pub static BACKEND: &str = "kqueue"; +#[cfg(target_os = "windows")] +pub static BACKEND: &str = "ReadDirectoryChanges"; diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index cbb05ba9d..8e2e177c8 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -7,6 +7,8 @@ // spell-checker:ignore (libs) kqueue // spell-checker:ignore (jargon) tailable untailable +// TODO: add tests for presume_input_pipe + extern crate tail; use crate::common::util::*; @@ -264,6 +266,7 @@ fn test_follow_redirect_stdin_name_retry() { } #[test] +#[cfg(not(target_os = "macos"))] // See test_stdin_redirect_dir_when_target_os_is_macos #[cfg(all(unix, not(any(target_os = "android", target_os = "freebsd"))))] // FIXME: fix this test for Android/FreeBSD fn test_stdin_redirect_dir() { // $ mkdir dir @@ -289,6 +292,39 @@ fn test_stdin_redirect_dir() { .code_is(1); } +// On macOS path.is_dir() can be false for directories if it was a redirect, +// e.g. `$ tail < DIR. The library feature to detect the +// std::io::ErrorKind::IsADirectory isn't stable so we currently show the a wrong +// error message. +// FIXME: If `std::io::ErrorKind::IsADirectory` becomes stable or macos handles +// redirected directories like linux show the correct message like in +// `test_stdin_redirect_dir` +#[test] +#[cfg(target_os = "macos")] +fn test_stdin_redirect_dir_when_target_os_is_macos() { + // $ mkdir dir + // $ tail < dir, $ tail - < dir + // tail: error reading 'standard input': Is a directory + + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + at.mkdir("dir"); + + ts.ucmd() + .set_stdin(std::fs::File::open(at.plus("dir")).unwrap()) + .fails() + .no_stdout() + .stderr_is("tail: cannot open 'standard input' for reading: No such file or directory") + .code_is(1); + ts.ucmd() + .set_stdin(std::fs::File::open(at.plus("dir")).unwrap()) + .arg("-") + .fails() + .no_stdout() + .stderr_is("tail: cannot open 'standard input' for reading: No such file or directory") + .code_is(1); +} + #[test] #[cfg(target_os = "linux")] fn test_follow_stdin_descriptor() {