From 7e1992a0d9004d9bdbb2a73942789831e9554dba Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 3 May 2023 19:18:41 -0700 Subject: [PATCH] Make line-index an external lib --- Cargo.lock | 9 + crates/ide-db/Cargo.toml | 3 + crates/ide-db/src/lib.rs | 4 +- crates/ide-db/src/tests/line_index.rs | 54 ++++++ lib/line-index/Cargo.toml | 11 ++ .../line-index/src/lib.rs | 165 +++--------------- lib/line-index/src/tests.rs | 73 ++++++++ 7 files changed, 177 insertions(+), 142 deletions(-) create mode 100644 crates/ide-db/src/tests/line_index.rs create mode 100644 lib/line-index/Cargo.toml rename crates/ide-db/src/line_index.rs => lib/line-index/src/lib.rs (57%) create mode 100644 lib/line-index/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index d0f0742716..8fc4680e21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -717,6 +717,7 @@ dependencies = [ "indexmap", "itertools", "limit", + "line-index", "memchr", "once_cell", "oorandom", @@ -912,6 +913,14 @@ dependencies = [ name = "limit" version = "0.0.0" +[[package]] +name = "line-index" +version = "0.1.0" +dependencies = [ + "non-hash", + "text-size", +] + [[package]] name = "lock_api" version = "0.4.9" diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml index fccd6d2b6d..022eb7859c 100644 --- a/crates/ide-db/Cargo.toml +++ b/crates/ide-db/Cargo.toml @@ -37,6 +37,9 @@ text-edit.workspace = true # something from some `hir-xxx` subpackage, reexport the API via `hir`. hir.workspace = true +# used to be a module, turned into its own library +line-index = { version = "0.1.0", path = "../../lib/line-index" } + [dev-dependencies] expect-test = "1.4.0" oorandom = "11.1.3" diff --git a/crates/ide-db/src/lib.rs b/crates/ide-db/src/lib.rs index 5263271fa6..ff1a20f03f 100644 --- a/crates/ide-db/src/lib.rs +++ b/crates/ide-db/src/lib.rs @@ -13,7 +13,6 @@ pub mod famous_defs; pub mod helpers; pub mod items_locator; pub mod label; -pub mod line_index; pub mod path_transform; pub mod rename; pub mod rust_doc; @@ -55,6 +54,8 @@ use triomphe::Arc; use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase}; pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; +pub use ::line_index; + /// `base_db` is normally also needed in places where `ide_db` is used, so this re-export is for convenience. pub use base_db; @@ -414,4 +415,5 @@ impl SnippetCap { #[cfg(test)] mod tests { mod sourcegen_lints; + mod line_index; } diff --git a/crates/ide-db/src/tests/line_index.rs b/crates/ide-db/src/tests/line_index.rs new file mode 100644 index 0000000000..c12936071d --- /dev/null +++ b/crates/ide-db/src/tests/line_index.rs @@ -0,0 +1,54 @@ +use line_index::{LineCol, LineIndex, WideEncoding}; +use test_utils::skip_slow_tests; + +#[test] +fn test_every_chars() { + if skip_slow_tests() { + return; + } + + let text: String = { + let mut chars: Vec = ((0 as char)..char::MAX).collect(); // Neat! + chars.extend("\n".repeat(chars.len() / 16).chars()); + let mut rng = oorandom::Rand32::new(stdx::rand::seed()); + stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize); + chars.into_iter().collect() + }; + assert!(text.contains('💩')); // Sanity check. + + let line_index = LineIndex::new(&text); + + let mut lin_col = LineCol { line: 0, col: 0 }; + let mut col_utf16 = 0; + let mut col_utf32 = 0; + for (offset, c) in text.char_indices() { + let got_offset = line_index.offset(lin_col).unwrap(); + assert_eq!(usize::from(got_offset), offset); + + let got_lin_col = line_index.line_col(got_offset); + assert_eq!(got_lin_col, lin_col); + + for enc in [WideEncoding::Utf16, WideEncoding::Utf32] { + let wide_lin_col = line_index.to_wide(enc, lin_col); + let got_lin_col = line_index.to_utf8(enc, wide_lin_col); + assert_eq!(got_lin_col, lin_col); + + let want_col = match enc { + WideEncoding::Utf16 => col_utf16, + WideEncoding::Utf32 => col_utf32, + }; + assert_eq!(wide_lin_col.col, want_col) + } + + if c == '\n' { + lin_col.line += 1; + lin_col.col = 0; + col_utf16 = 0; + col_utf32 = 0; + } else { + lin_col.col += c.len_utf8() as u32; + col_utf16 += c.len_utf16() as u32; + col_utf32 += 1; + } + } +} diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml new file mode 100644 index 0000000000..0abc539e89 --- /dev/null +++ b/lib/line-index/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "line-index" +version = "0.1.0" +description = "Maps flat `TextSize` offsets into `(line, column)` representation." +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash" +edition = "2021" + +[dependencies] +text-size = "1" +non-hash = { version = "0.1.0", path = "../non-hash" } diff --git a/crates/ide-db/src/line_index.rs b/lib/line-index/src/lib.rs similarity index 57% rename from crates/ide-db/src/line_index.rs rename to lib/line-index/src/lib.rs index 9fb58ebe8a..af01eafc28 100644 --- a/crates/ide-db/src/line_index.rs +++ b/lib/line-index/src/lib.rs @@ -1,10 +1,16 @@ -//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` -//! representation. +//! See [`LineIndex`]. + +#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)] + +#[cfg(test)] +mod tests; + use std::{iter, mem}; -use stdx::hash::NoHashHashMap; -use syntax::{TextRange, TextSize}; +use non_hash::NoHashHashMap; +use text_size::{TextRange, TextSize}; +/// Maps flat [`TextSize`] offsets into `(line, column)` representation. #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { /// Offset the beginning of each line, zero-based. @@ -16,26 +22,29 @@ pub struct LineIndex { /// Line/Column information in native, utf8 format. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct LineCol { - /// Zero-based + /// Zero-based. pub line: u32, - /// Zero-based utf8 offset + /// Zero-based UTF-8 offset. pub col: u32, } +/// A kind of wide character encoding. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum WideEncoding { + /// UTF-16. Utf16, + /// UTF-32. Utf32, } /// Line/Column information in legacy encodings. /// -/// Deliberately not a generic type and different from `LineCol`. +/// Deliberately not a generic type and different from [`LineCol`]. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct WideLineCol { - /// Zero-based + /// Zero-based. pub line: u32, - /// Zero-based + /// Zero-based. pub col: u32, } @@ -70,6 +79,7 @@ impl WideChar { } impl LineIndex { + /// Returns a `LineIndex` for the `text`. pub fn new(text: &str) -> LineIndex { let mut line_wide_chars = NoHashHashMap::default(); let mut wide_chars = Vec::new(); @@ -115,6 +125,7 @@ impl LineIndex { LineIndex { newlines, line_wide_chars } } + /// Transforms the `TextSize` into a `LineCol`. pub fn line_col(&self, offset: TextSize) -> LineCol { let line = self.newlines.partition_point(|&it| it <= offset) - 1; let line_start_offset = self.newlines[line]; @@ -122,22 +133,26 @@ impl LineIndex { LineCol { line: line as u32, col: col.into() } } + /// Transforms the `LineCol` into a `TextSize`. pub fn offset(&self, line_col: LineCol) -> Option { self.newlines .get(line_col.line as usize) .map(|offset| offset + TextSize::from(line_col.col)) } + /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol { let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); WideLineCol { line: line_col.line, col: col as u32 } } + /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); LineCol { line: line_col.line, col: col.into() } } + /// Returns an iterator over the ranges for the lines. pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { let lo = self.newlines.partition_point(|&it| it < range.start()); let hi = self.newlines.partition_point(|&it| it <= range.end()); @@ -183,135 +198,3 @@ impl LineIndex { col.into() } } - -#[cfg(test)] -mod tests { - use test_utils::skip_slow_tests; - - use super::WideEncoding::{Utf16, Utf32}; - use super::*; - - #[test] - fn test_line_index() { - let text = "hello\nworld"; - let table = [ - (00, 0, 0), - (01, 0, 1), - (05, 0, 5), - (06, 1, 0), - (07, 1, 1), - (08, 1, 2), - (10, 1, 4), - (11, 1, 5), - (12, 1, 6), - ]; - - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } - - let text = "\nhello\nworld"; - let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; - let index = LineIndex::new(text); - for (offset, line, col) in table { - assert_eq!(index.line_col(offset.into()), LineCol { line, col }); - } - } - - #[test] - fn test_char_len() { - assert_eq!('メ'.len_utf8(), 3); - assert_eq!('メ'.len_utf16(), 1); - } - - #[test] - fn test_empty_index() { - let col_index = LineIndex::new( - " -const C: char = 'x'; -", - ); - assert_eq!(col_index.line_wide_chars.len(), 0); - } - - #[test] - fn test_every_chars() { - if skip_slow_tests() { - return; - } - - let text: String = { - let mut chars: Vec = ((0 as char)..char::MAX).collect(); // Neat! - chars.extend("\n".repeat(chars.len() / 16).chars()); - let mut rng = oorandom::Rand32::new(stdx::rand::seed()); - stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize); - chars.into_iter().collect() - }; - assert!(text.contains('💩')); // Sanity check. - - let line_index = LineIndex::new(&text); - - let mut lin_col = LineCol { line: 0, col: 0 }; - let mut col_utf16 = 0; - let mut col_utf32 = 0; - for (offset, c) in text.char_indices() { - let got_offset = line_index.offset(lin_col).unwrap(); - assert_eq!(usize::from(got_offset), offset); - - let got_lin_col = line_index.line_col(got_offset); - assert_eq!(got_lin_col, lin_col); - - for enc in [Utf16, Utf32] { - let wide_lin_col = line_index.to_wide(enc, lin_col); - let got_lin_col = line_index.to_utf8(enc, wide_lin_col); - assert_eq!(got_lin_col, lin_col); - - let want_col = match enc { - Utf16 => col_utf16, - Utf32 => col_utf32, - }; - assert_eq!(wide_lin_col.col, want_col) - } - - if c == '\n' { - lin_col.line += 1; - lin_col.col = 0; - col_utf16 = 0; - col_utf32 = 0; - } else { - lin_col.col += c.len_utf8() as u32; - col_utf16 += c.len_utf16() as u32; - col_utf32 += 1; - } - } - } - - #[test] - fn test_splitlines() { - fn r(lo: u32, hi: u32) -> TextRange { - TextRange::new(lo.into(), hi.into()) - } - - let text = "a\nbb\nccc\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 9)).collect::>(); - let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; - assert_eq!(actual, expected); - - let text = ""; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 0)).collect::>(); - let expected = vec![]; - assert_eq!(actual, expected); - - let text = "\n"; - let line_index = LineIndex::new(text); - - let actual = line_index.lines(r(0, 1)).collect::>(); - let expected = vec![r(0, 1)]; - assert_eq!(actual, expected) - } -} diff --git a/lib/line-index/src/tests.rs b/lib/line-index/src/tests.rs new file mode 100644 index 0000000000..4b58cfc47d --- /dev/null +++ b/lib/line-index/src/tests.rs @@ -0,0 +1,73 @@ +use super::*; + +#[test] +fn test_line_index() { + let text = "hello\nworld"; + let table = [ + (00, 0, 0), + (01, 0, 1), + (05, 0, 5), + (06, 1, 0), + (07, 1, 1), + (08, 1, 2), + (10, 1, 4), + (11, 1, 5), + (12, 1, 6), + ]; + + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } + + let text = "\nhello\nworld"; + let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } +} + +#[test] +fn test_char_len() { + assert_eq!('メ'.len_utf8(), 3); + assert_eq!('メ'.len_utf16(), 1); +} + +#[test] +fn test_empty_index() { + let col_index = LineIndex::new( + " +const C: char = 'x'; +", + ); + assert_eq!(col_index.line_wide_chars.len(), 0); +} + +#[test] +fn test_splitlines() { + fn r(lo: u32, hi: u32) -> TextRange { + TextRange::new(lo.into(), hi.into()) + } + + let text = "a\nbb\nccc\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 9)).collect::>(); + let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; + assert_eq!(actual, expected); + + let text = ""; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 0)).collect::>(); + let expected = vec![]; + assert_eq!(actual, expected); + + let text = "\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 1)).collect::>(); + let expected = vec![r(0, 1)]; + assert_eq!(actual, expected) +}