Make line-index an external lib

2024-12-27 05:23:24 +00:00 · 2023-05-03 19:18:41 -07:00 · 2023-05-03 19:18:41 -07:00 · 7e1992a0d9
commit 7e1992a0d9
parent 29256f22e4
7 changed files with 177 additions and 142 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -717,6 +717,7 @@ dependencies = [
 "indexmap",
 "itertools",
 "limit",
+ "line-index",
 "memchr",
 "once_cell",
 "oorandom",
@ -912,6 +913,14 @@ dependencies = [
 name = "limit"
 version = "0.0.0"

+[[package]]
+name = "line-index"
+version = "0.1.0"
+dependencies = [
+ "non-hash",
+ "text-size",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.9"
--- a/crates/ide-db/Cargo.toml
+++ b/crates/ide-db/Cargo.toml
@ -37,6 +37,9 @@ text-edit.workspace = true
 # something from some `hir-xxx` subpackage, reexport the API via `hir`.
 hir.workspace = true

+# used to be a module, turned into its own library
+line-index = { version = "0.1.0", path = "../../lib/line-index" }
+
 [dev-dependencies]
 expect-test = "1.4.0"
 oorandom = "11.1.3"
--- a/crates/ide-db/src/lib.rs
+++ b/crates/ide-db/src/lib.rs
@ -13,7 +13,6 @@ pub mod famous_defs;
 pub mod helpers;
 pub mod items_locator;
 pub mod label;
-pub mod line_index;
 pub mod path_transform;
 pub mod rename;
 pub mod rust_doc;
@ -55,6 +54,8 @@ use triomphe::Arc;
 use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase};
 pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};

+pub use ::line_index;
+
 /// `base_db` is normally also needed in places where `ide_db` is used, so this re-export is for convenience.
 pub use base_db;

@ -414,4 +415,5 @@ impl SnippetCap {
 #[cfg(test)]
 mod tests {
    mod sourcegen_lints;
+    mod line_index;
 }
--- a/crates/ide-db/src/tests/line_index.rs
+++ b/crates/ide-db/src/tests/line_index.rs
@ -0,0 +1,54 @@
+use line_index::{LineCol, LineIndex, WideEncoding};
+use test_utils::skip_slow_tests;
+
+#[test]
+fn test_every_chars() {
+    if skip_slow_tests() {
+        return;
+    }
+
+    let text: String = {
+        let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
+        chars.extend("\n".repeat(chars.len() / 16).chars());
+        let mut rng = oorandom::Rand32::new(stdx::rand::seed());
+        stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
+        chars.into_iter().collect()
+    };
+    assert!(text.contains('💩')); // Sanity check.
+
+    let line_index = LineIndex::new(&text);
+
+    let mut lin_col = LineCol { line: 0, col: 0 };
+    let mut col_utf16 = 0;
+    let mut col_utf32 = 0;
+    for (offset, c) in text.char_indices() {
+        let got_offset = line_index.offset(lin_col).unwrap();
+        assert_eq!(usize::from(got_offset), offset);
+
+        let got_lin_col = line_index.line_col(got_offset);
+        assert_eq!(got_lin_col, lin_col);
+
+        for enc in [WideEncoding::Utf16, WideEncoding::Utf32] {
+            let wide_lin_col = line_index.to_wide(enc, lin_col);
+            let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
+            assert_eq!(got_lin_col, lin_col);
+
+            let want_col = match enc {
+                WideEncoding::Utf16 => col_utf16,
+                WideEncoding::Utf32 => col_utf32,
+            };
+            assert_eq!(wide_lin_col.col, want_col)
+        }
+
+        if c == '\n' {
+            lin_col.line += 1;
+            lin_col.col = 0;
+            col_utf16 = 0;
+            col_utf32 = 0;
+        } else {
+            lin_col.col += c.len_utf8() as u32;
+            col_utf16 += c.len_utf16() as u32;
+            col_utf32 += 1;
+        }
+    }
+}
--- a/lib/line-index/Cargo.toml
+++ b/lib/line-index/Cargo.toml
@ -0,0 +1,11 @@
+[package]
+name = "line-index"
+version = "0.1.0"
+description = "Maps flat `TextSize` offsets into `(line, column)` representation."
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/non-hash"
+edition = "2021"
+
+[dependencies]
+text-size = "1"
+non-hash = { version = "0.1.0", path = "../non-hash" }
--- a/crates/ide-db/src/line_index.rs
+++ b/crates/ide-db/src/line_index.rs
@ -1,10 +1,16 @@
-//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
-//! representation.
+//! See [`LineIndex`].
+
+#![deny(clippy::pedantic, missing_debug_implementations, missing_docs, rust_2018_idioms)]
+
+#[cfg(test)]
+mod tests;
+
 use std::{iter, mem};

-use stdx::hash::NoHashHashMap;
-use syntax::{TextRange, TextSize};
+use non_hash::NoHashHashMap;
+use text_size::{TextRange, TextSize};

+/// Maps flat [`TextSize`] offsets into `(line, column)` representation.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct LineIndex {
    /// Offset the beginning of each line, zero-based.
@ -16,26 +22,29 @@ pub struct LineIndex {
 /// Line/Column information in native, utf8 format.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct LineCol {
-    /// Zero-based
+    /// Zero-based.
    pub line: u32,
-    /// Zero-based utf8 offset
+    /// Zero-based UTF-8 offset.
    pub col: u32,
 }

+/// A kind of wide character encoding.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum WideEncoding {
+    /// UTF-16.
    Utf16,
+    /// UTF-32.
    Utf32,
 }

 /// Line/Column information in legacy encodings.
 ///
-/// Deliberately not a generic type and different from `LineCol`.
+/// Deliberately not a generic type and different from [`LineCol`].
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct WideLineCol {
-    /// Zero-based
+    /// Zero-based.
    pub line: u32,
-    /// Zero-based
+    /// Zero-based.
    pub col: u32,
 }

@ -70,6 +79,7 @@ impl WideChar {
 }

 impl LineIndex {
+    /// Returns a `LineIndex` for the `text`.
    pub fn new(text: &str) -> LineIndex {
        let mut line_wide_chars = NoHashHashMap::default();
        let mut wide_chars = Vec::new();
@ -115,6 +125,7 @@ impl LineIndex {
        LineIndex { newlines, line_wide_chars }
    }

+    /// Transforms the `TextSize` into a `LineCol`.
    pub fn line_col(&self, offset: TextSize) -> LineCol {
        let line = self.newlines.partition_point(|&it| it <= offset) - 1;
        let line_start_offset = self.newlines[line];
@ -122,22 +133,26 @@ impl LineIndex {
        LineCol { line: line as u32, col: col.into() }
    }

+    /// Transforms the `LineCol` into a `TextSize`.
    pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
        self.newlines
            .get(line_col.line as usize)
            .map(|offset| offset + TextSize::from(line_col.col))
    }

+    /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`.
    pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
        let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
        WideLineCol { line: line_col.line, col: col as u32 }
    }

+    /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`.
    pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
        let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
        LineCol { line: line_col.line, col: col.into() }
    }

+    /// Returns an iterator over the ranges for the lines.
    pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
        let lo = self.newlines.partition_point(|&it| it < range.start());
        let hi = self.newlines.partition_point(|&it| it <= range.end());
@ -183,135 +198,3 @@ impl LineIndex {
        col.into()
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use test_utils::skip_slow_tests;
-
-    use super::WideEncoding::{Utf16, Utf32};
-    use super::*;
-
-    #[test]
-    fn test_line_index() {
-        let text = "hello\nworld";
-        let table = [
-            (00, 0, 0),
-            (01, 0, 1),
-            (05, 0, 5),
-            (06, 1, 0),
-            (07, 1, 1),
-            (08, 1, 2),
-            (10, 1, 4),
-            (11, 1, 5),
-            (12, 1, 6),
-        ];
-
-        let index = LineIndex::new(text);
-        for (offset, line, col) in table {
-            assert_eq!(index.line_col(offset.into()), LineCol { line, col });
-        }
-
-        let text = "\nhello\nworld";
-        let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
-        let index = LineIndex::new(text);
-        for (offset, line, col) in table {
-            assert_eq!(index.line_col(offset.into()), LineCol { line, col });
-        }
-    }
-
-    #[test]
-    fn test_char_len() {
-        assert_eq!('メ'.len_utf8(), 3);
-        assert_eq!('メ'.len_utf16(), 1);
-    }
-
-    #[test]
-    fn test_empty_index() {
-        let col_index = LineIndex::new(
-            "
-const C: char = 'x';
-",
-        );
-        assert_eq!(col_index.line_wide_chars.len(), 0);
-    }
-
-    #[test]
-    fn test_every_chars() {
-        if skip_slow_tests() {
-            return;
-        }
-
-        let text: String = {
-            let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
-            chars.extend("\n".repeat(chars.len() / 16).chars());
-            let mut rng = oorandom::Rand32::new(stdx::rand::seed());
-            stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
-            chars.into_iter().collect()
-        };
-        assert!(text.contains('💩')); // Sanity check.
-
-        let line_index = LineIndex::new(&text);
-
-        let mut lin_col = LineCol { line: 0, col: 0 };
-        let mut col_utf16 = 0;
-        let mut col_utf32 = 0;
-        for (offset, c) in text.char_indices() {
-            let got_offset = line_index.offset(lin_col).unwrap();
-            assert_eq!(usize::from(got_offset), offset);
-
-            let got_lin_col = line_index.line_col(got_offset);
-            assert_eq!(got_lin_col, lin_col);
-
-            for enc in [Utf16, Utf32] {
-                let wide_lin_col = line_index.to_wide(enc, lin_col);
-                let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
-                assert_eq!(got_lin_col, lin_col);
-
-                let want_col = match enc {
-                    Utf16 => col_utf16,
-                    Utf32 => col_utf32,
-                };
-                assert_eq!(wide_lin_col.col, want_col)
-            }
-
-            if c == '\n' {
-                lin_col.line += 1;
-                lin_col.col = 0;
-                col_utf16 = 0;
-                col_utf32 = 0;
-            } else {
-                lin_col.col += c.len_utf8() as u32;
-                col_utf16 += c.len_utf16() as u32;
-                col_utf32 += 1;
-            }
-        }
-    }
-
-    #[test]
-    fn test_splitlines() {
-        fn r(lo: u32, hi: u32) -> TextRange {
-            TextRange::new(lo.into(), hi.into())
-        }
-
-        let text = "a\nbb\nccc\n";
-        let line_index = LineIndex::new(text);
-
-        let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
-        let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
-        assert_eq!(actual, expected);
-
-        let text = "";
-        let line_index = LineIndex::new(text);
-
-        let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
-        let expected = vec![];
-        assert_eq!(actual, expected);
-
-        let text = "\n";
-        let line_index = LineIndex::new(text);
-
-        let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
-        let expected = vec![r(0, 1)];
-        assert_eq!(actual, expected)
-    }
-}
--- a/lib/line-index/src/tests.rs
+++ b/lib/line-index/src/tests.rs
@ -0,0 +1,73 @@
+use super::*;
+
+#[test]
+fn test_line_index() {
+    let text = "hello\nworld";
+    let table = [
+        (00, 0, 0),
+        (01, 0, 1),
+        (05, 0, 5),
+        (06, 1, 0),
+        (07, 1, 1),
+        (08, 1, 2),
+        (10, 1, 4),
+        (11, 1, 5),
+        (12, 1, 6),
+    ];
+
+    let index = LineIndex::new(text);
+    for (offset, line, col) in table {
+        assert_eq!(index.line_col(offset.into()), LineCol { line, col });
+    }
+
+    let text = "\nhello\nworld";
+    let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
+    let index = LineIndex::new(text);
+    for (offset, line, col) in table {
+        assert_eq!(index.line_col(offset.into()), LineCol { line, col });
+    }
+}
+
+#[test]
+fn test_char_len() {
+    assert_eq!('メ'.len_utf8(), 3);
+    assert_eq!('メ'.len_utf16(), 1);
+}
+
+#[test]
+fn test_empty_index() {
+    let col_index = LineIndex::new(
+        "
+const C: char = 'x';
+",
+    );
+    assert_eq!(col_index.line_wide_chars.len(), 0);
+}
+
+#[test]
+fn test_splitlines() {
+    fn r(lo: u32, hi: u32) -> TextRange {
+        TextRange::new(lo.into(), hi.into())
+    }
+
+    let text = "a\nbb\nccc\n";
+    let line_index = LineIndex::new(text);
+
+    let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
+    let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
+    assert_eq!(actual, expected);
+
+    let text = "";
+    let line_index = LineIndex::new(text);
+
+    let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
+    let expected = vec![];
+    assert_eq!(actual, expected);
+
+    let text = "\n";
+    let line_index = LineIndex::new(text);
+
+    let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
+    let expected = vec![r(0, 1)];
+    assert_eq!(actual, expected)
+}