rust-analyzer/crates/ide-db/src/line_index.rs

//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
//! representation.
use std::{iter, mem};

use stdx::hash::NoHashHashMap;
use syntax::{TextRange, TextSize};

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LineIndex {
    /// Offset the beginning of each line, zero-based.
    pub(crate) newlines: Vec<TextSize>,
    /// List of non-ASCII characters on each line.
    pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,
}

/// Line/Column information in native, utf8 format.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineCol {
    /// Zero-based
    pub line: u32,
    /// Zero-based utf8 offset
    pub col: u32,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum WideEncoding {
    Utf16,
    Utf32,
}

/// Line/Column information in legacy encodings.
///
/// Deliberately not a generic type and different from `LineCol`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct WideLineCol {
    /// Zero-based
    pub line: u32,
    /// Zero-based
    pub col: u32,
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub(crate) struct WideChar {
    /// Start offset of a character inside a line, zero-based
    pub(crate) start: TextSize,
    /// End offset of a character inside a line, zero-based
    pub(crate) end: TextSize,
}

impl WideChar {
    /// Returns the length in 8-bit UTF-8 code units.
    fn len(&self) -> TextSize {
        self.end - self.start
    }

    /// Returns the length in UTF-16 or UTF-32 code units.
    fn wide_len(&self, enc: WideEncoding) -> usize {
        match enc {
            WideEncoding::Utf16 => {
                if self.len() == TextSize::from(4) {
                    2
                } else {
                    1
                }
            }

            WideEncoding::Utf32 => 1,
        }
    }
}

impl LineIndex {
    pub fn new(text: &str) -> LineIndex {
        let mut line_wide_chars = NoHashHashMap::default();
        let mut wide_chars = Vec::new();

        let mut newlines = Vec::with_capacity(16);
        newlines.push(TextSize::from(0));

        let mut curr_row = 0.into();
        let mut curr_col = 0.into();
        let mut line = 0;
        for c in text.chars() {
            let c_len = TextSize::of(c);
            curr_row += c_len;
            if c == '\n' {
                newlines.push(curr_row);

                // Save any utf-16 characters seen in the previous line
                if !wide_chars.is_empty() {
                    line_wide_chars.insert(line, mem::take(&mut wide_chars));
                }

                // Prepare for processing the next line
                curr_col = 0.into();
                line += 1;
                continue;
            }

            if !c.is_ascii() {
                wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });
            }

            curr_col += c_len;
        }

        // Save any utf-16 characters seen in the last line
        if !wide_chars.is_empty() {
            line_wide_chars.insert(line, wide_chars);
        }

        newlines.shrink_to_fit();
        line_wide_chars.shrink_to_fit();

        LineIndex { newlines, line_wide_chars }
    }

    pub fn line_col(&self, offset: TextSize) -> LineCol {
        let line = self.newlines.partition_point(|&it| it <= offset) - 1;
        let line_start_offset = self.newlines[line];
        let col = offset - line_start_offset;
        LineCol { line: line as u32, col: col.into() }
    }

    pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
        self.newlines
            .get(line_col.line as usize)
            .map(|offset| offset + TextSize::from(line_col.col))
    }

    pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
        let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
        WideLineCol { line: line_col.line, col: col as u32 }
    }

    pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
        let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
        LineCol { line: line_col.line, col: col.into() }
    }

    pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
        let lo = self.newlines.partition_point(|&it| it < range.start());
        let hi = self.newlines.partition_point(|&it| it <= range.end());
        let all = iter::once(range.start())
            .chain(self.newlines[lo..hi].iter().copied())
            .chain(iter::once(range.end()));

        all.clone()
            .zip(all.skip(1))
            .map(|(lo, hi)| TextRange::new(lo, hi))
            .filter(|it| !it.is_empty())
    }

    fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {
        let mut res: usize = col.into();
        if let Some(wide_chars) = self.line_wide_chars.get(&line) {
            for c in wide_chars {
                if c.end <= col {
                    res -= usize::from(c.len()) - c.wide_len(enc);
                } else {
                    // From here on, all utf16 characters come *after* the character we are mapping,
                    // so we don't need to take them into account
                    break;
                }
            }
        }
        res
    }

    fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {
        if let Some(wide_chars) = self.line_wide_chars.get(&line) {
            for c in wide_chars {
                if col > u32::from(c.start) {
                    col += u32::from(c.len()) - c.wide_len(enc) as u32;
                } else {
                    // From here on, all utf16 characters come *after* the character we are mapping,
                    // so we don't need to take them into account
                    break;
                }
            }
        }

        col.into()
    }
}

#[cfg(test)]
mod tests {
    use test_utils::skip_slow_tests;

    use super::WideEncoding::{Utf16, Utf32};
    use super::*;

    #[test]
    fn test_line_index() {
        let text = "hello\nworld";
        let table = [
            (00, 0, 0),
            (01, 0, 1),
            (05, 0, 5),
            (06, 1, 0),
            (07, 1, 1),
            (08, 1, 2),
            (10, 1, 4),
            (11, 1, 5),
            (12, 1, 6),
        ];

        let index = LineIndex::new(text);
        for (offset, line, col) in table {
            assert_eq!(index.line_col(offset.into()), LineCol { line, col });
        }

        let text = "\nhello\nworld";
        let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
        let index = LineIndex::new(text);
        for (offset, line, col) in table {
            assert_eq!(index.line_col(offset.into()), LineCol { line, col });
        }
    }

    #[test]
    fn test_char_len() {
        assert_eq!('メ'.len_utf8(), 3);
        assert_eq!('メ'.len_utf16(), 1);
    }

    #[test]
    fn test_empty_index() {
        let col_index = LineIndex::new(
            "
const C: char = 'x';
",
        );
        assert_eq!(col_index.line_wide_chars.len(), 0);
    }

    #[test]
    fn test_every_chars() {
        if skip_slow_tests() {
            return;
        }

        let text: String = {
            let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
            chars.extend("\n".repeat(chars.len() / 16).chars());
            let mut rng = oorandom::Rand32::new(stdx::rand::seed());
            stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
            chars.into_iter().collect()
        };
        assert!(text.contains('💩')); // Sanity check.

        let line_index = LineIndex::new(&text);

        let mut lin_col = LineCol { line: 0, col: 0 };
        let mut col_utf16 = 0;
        let mut col_utf32 = 0;
        for (offset, c) in text.char_indices() {
            let got_offset = line_index.offset(lin_col).unwrap();
            assert_eq!(usize::from(got_offset), offset);

            let got_lin_col = line_index.line_col(got_offset);
            assert_eq!(got_lin_col, lin_col);

            for enc in [Utf16, Utf32] {
                let wide_lin_col = line_index.to_wide(enc, lin_col);
                let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
                assert_eq!(got_lin_col, lin_col);

                let want_col = match enc {
                    Utf16 => col_utf16,
                    Utf32 => col_utf32,
                };
                assert_eq!(wide_lin_col.col, want_col)
            }

            if c == '\n' {
                lin_col.line += 1;
                lin_col.col = 0;
                col_utf16 = 0;
                col_utf32 = 0;
            } else {
                lin_col.col += c.len_utf8() as u32;
                col_utf16 += c.len_utf16() as u32;
                col_utf32 += 1;
            }
        }
    }

    #[test]
    fn test_splitlines() {
        fn r(lo: u32, hi: u32) -> TextRange {
            TextRange::new(lo.into(), hi.into())
        }

        let text = "a\nbb\nccc\n";
        let line_index = LineIndex::new(text);

        let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
        let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
        assert_eq!(actual, expected);

        let text = "";
        let line_index = LineIndex::new(text);

        let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
        let expected = vec![];
        assert_eq!(actual, expected);

        let text = "\n";
        let line_index = LineIndex::new(text);

        let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
        let expected = vec![r(0, 1)];
        assert_eq!(actual, expected)
    }
}
Convert code to text-size 2020-04-24 21:40:41 +00:00			//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
Cleanup 2020-02-06 13:43:46 +00:00			`//! representation.`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`use std::{iter, mem};`
Switch to TryFrom 2020-04-24 22:57:47 +00:00
Make use of NoHash hashing for FileId and CrateId 2022-08-25 18:31:02 +00:00			`use stdx::hash::NoHashHashMap;`
Rename ra_syntax -> syntax 2020-08-12 16:26:51 +00:00			`use syntax::{TextRange, TextSize};`
Add line index 2018-08-10 18:13:39 +00:00
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`#[derive(Clone, Debug, PartialEq, Eq)]`
Add line index 2018-08-10 18:13:39 +00:00			`pub struct LineIndex {`
Update crates/ide-db/src/line_index.rs Co-authored-by: Stig Brautaset <stig@brautaset.org> 2023-02-14 08:19:58 +00:00			`/// Offset the beginning of each line, zero-based.`
Convert code to text-size 2020-04-24 21:40:41 +00:00			`pub(crate) newlines: Vec<TextSize>,`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`/// List of non-ASCII characters on each line.`
			`pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,`
Add line index 2018-08-10 18:13:39 +00:00			`}`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`/// Line/Column information in native, utf8 format.`
Add line index 2018-08-10 18:13:39 +00:00			`#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`pub struct LineCol {`
add analysis-bench to benchmark incremental analysis Can be used like this: ``` $ cargo run --release -p ra_cli -- \ analysis-bench ../chalk/ \ --complete ../chalk/chalk-engine/src/logic.rs:94:0 loading: 225.970093ms from scratch: 8.492373325s no change: 445.265µs trivial change: 95.631242ms ``` Or like this: ``` $ cargo run --release -p ra_cli -- \ analysis-bench ../chalk/ \ --highlight ../chalk/chalk-engine/src/logic.rs loading: 209.873484ms from scratch: 9.504916942s no change: 7.731119ms trivial change: 124.984039ms ``` "from scratch" includes initial analysis of the relevant bits of the project "no change" just asks the same question for the second time. It measures overhead on assembling the answer outside of salsa. "trivial change" doesn't do an actual salsa change, it just advances the revision. This test how fast is salsa at validating things. 2019-06-16 16:19:38 +00:00			`/// Zero-based`
Add line index 2018-08-10 18:13:39 +00:00			`pub line: u32,`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`/// Zero-based utf8 offset`
Prepare for utf-8 offsets 2021-02-12 18:24:10 +00:00			`pub col: u32,`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`pub enum WideEncoding {`
			`Utf16,`
			`Utf32,`
			`}`

			`/// Line/Column information in legacy encodings.`
			`///`
			/// Deliberately not a generic type and different from `LineCol`.
			`#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]`
			`pub struct WideLineCol {`
Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`/// Zero-based`
			`pub line: u32,`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`/// Zero-based`
Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`pub col: u32,`
			`}`

Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`#[derive(Clone, Debug, Hash, PartialEq, Eq)]`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`pub(crate) struct WideChar {`
Don't count start of non-ASCII characters as being inside of them 2020-05-03 06:54:15 +00:00			`/// Start offset of a character inside a line, zero-based`
Convert code to text-size 2020-04-24 21:40:41 +00:00			`pub(crate) start: TextSize,`
Don't count start of non-ASCII characters as being inside of them 2020-05-03 06:54:15 +00:00			`/// End offset of a character inside a line, zero-based`
Convert code to text-size 2020-04-24 21:40:41 +00:00			`pub(crate) end: TextSize,`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`impl WideChar {`
Fix column conversion for supplementary plane characters 2020-05-05 17:29:04 +00:00			`/// Returns the length in 8-bit UTF-8 code units.`
Convert code to text-size 2020-04-24 21:40:41 +00:00			`fn len(&self) -> TextSize {`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`self.end - self.start`
			`}`
Fix column conversion for supplementary plane characters 2020-05-05 17:29:04 +00:00
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`/// Returns the length in UTF-16 or UTF-32 code units.`
			`fn wide_len(&self, enc: WideEncoding) -> usize {`
			`match enc {`
			`WideEncoding::Utf16 => {`
			`if self.len() == TextSize::from(4) {`
			`2`
			`} else {`
			`1`
			`}`
			`}`

			`WideEncoding::Utf32 => 1,`
Fix column conversion for supplementary plane characters 2020-05-05 17:29:04 +00:00			`}`
			`}`
Add line index 2018-08-10 18:13:39 +00:00			`}`

			`impl LineIndex {`
			`pub fn new(text: &str) -> LineIndex {`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`let mut line_wide_chars = NoHashHashMap::default();`
			`let mut wide_chars = Vec::new();`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00
internal: Optimize `apply_document_changes` a bit 2022-11-04 23:27:03 +00:00			`let mut newlines = Vec::with_capacity(16);`
			`newlines.push(TextSize::from(0));`

			`let mut curr_row = 0.into();`
			`let mut curr_col = 0.into();`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`let mut line = 0;`
Add line index 2018-08-10 18:13:39 +00:00			`for c in text.chars() {`
Cleanups 2020-04-24 22:17:50 +00:00			`let c_len = TextSize::of(c);`
			`curr_row += c_len;`
Add line index 2018-08-10 18:13:39 +00:00			`if c == '\n' {`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`newlines.push(curr_row);`

			`// Save any utf-16 characters seen in the previous line`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`if !wide_chars.is_empty() {`
			`line_wide_chars.insert(line, mem::take(&mut wide_chars));`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

			`// Prepare for processing the next line`
			`curr_col = 0.into();`
			`line += 1;`
			`continue;`
Add line index 2018-08-10 18:13:39 +00:00			`}`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00
Cleanups 2020-04-24 22:17:50 +00:00			`if !c.is_ascii() {`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

Cleanups 2020-04-24 22:17:50 +00:00			`curr_col += c_len;`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`
fix arbitrary offset generation, col translation working 2018-12-22 19:52:43 +00:00
			`// Save any utf-16 characters seen in the last line`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`if !wide_chars.is_empty() {`
			`line_wide_chars.insert(line, wide_chars);`
fix arbitrary offset generation, col translation working 2018-12-22 19:52:43 +00:00			`}`

Move Expander and LowerCtx into separate modules 2023-04-17 15:31:39 +00:00			`newlines.shrink_to_fit();`
			`line_wide_chars.shrink_to_fit();`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`LineIndex { newlines, line_wide_chars }`
Add line index 2018-08-10 18:13:39 +00:00			`}`

Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`pub fn line_col(&self, offset: TextSize) -> LineCol {`
internal: use API stabilized in 1.52 2021-05-04 11:10:49 +00:00			`let line = self.newlines.partition_point(\|&it\| it <= offset) - 1;`
Add line index 2018-08-10 18:13:39 +00:00			`let line_start_offset = self.newlines[line];`
			`let col = offset - line_start_offset;`
Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`LineCol { line: line as u32, col: col.into() }`
			`}`

fix: don't panic on seeing an unexpected offset 2022-01-03 14:49:47 +00:00			`pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {`
			`self.newlines`
			`.get(line_col.line as usize)`
			`.map(\|offset\| offset + TextSize::from(line_col.col))`
Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`}`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {`
			`let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());`
			`WideLineCol { line: line_col.line, col: col as u32 }`
extend selection via LSP 2018-08-10 19:23:17 +00:00			`}`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {`
			`let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);`
Make utf8 default, implement utf16 in terms of it 2021-02-12 19:09:53 +00:00			`LineCol { line: line_col.line, col: col.into() }`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

Ensure that semantic tokens are single-line 2020-02-27 13:54:31 +00:00			`pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {`
internal: use API stabilized in 1.52 2021-05-04 11:10:49 +00:00			`let lo = self.newlines.partition_point(\|&it\| it < range.start());`
			`let hi = self.newlines.partition_point(\|&it\| it <= range.end());`
Ensure that semantic tokens are single-line 2020-02-27 13:54:31 +00:00			`let all = iter::once(range.start())`
			`.chain(self.newlines[lo..hi].iter().copied())`
			`.chain(iter::once(range.end()));`

			`all.clone()`
			`.zip(all.skip(1))`
Convert code to text-size 2020-04-24 21:40:41 +00:00			`.map(\|(lo, hi)\| TextRange::new(lo, hi))`
Ensure that semantic tokens are single-line 2020-02-27 13:54:31 +00:00			`.filter(\|it\| !it.is_empty())`
			`}`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {`
Cleanups 2020-04-24 22:17:50 +00:00			`let mut res: usize = col.into();`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`if let Some(wide_chars) = self.line_wide_chars.get(&line) {`
			`for c in wide_chars {`
Cleanups 2020-04-24 22:17:50 +00:00			`if c.end <= col {`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`res -= usize::from(c.len()) - c.wide_len(enc);`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`} else {`
			`// From here on, all utf16 characters come after the character we are mapping,`
			`// so we don't need to take them into account`
			`break;`
			`}`
			`}`
			`}`
Cleanups 2020-04-24 22:17:50 +00:00			`res`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`}`

Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {`
			`if let Some(wide_chars) = self.line_wide_chars.get(&line) {`
			`for c in wide_chars {`
Don't count start of non-ASCII characters as being inside of them 2020-05-03 06:54:15 +00:00			`if col > u32::from(c.start) {`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`col += u32::from(c.len()) - c.wide_len(enc) as u32;`
Support UTF-16 chars in LineIndex 2018-11-15 16:34:05 +00:00			`} else {`
			`// From here on, all utf16 characters come after the character we are mapping,`
			`// so we don't need to take them into account`
			`break;`
			`}`
			`}`
			`}`

Switch to TryFrom 2020-04-24 22:57:47 +00:00			`col.into()`
Add line index 2018-08-10 18:13:39 +00:00			`}`
			`}`

add line_index proptest 2018-12-23 13:01:36 +00:00			`#[cfg(test)]`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`mod tests {`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`use test_utils::skip_slow_tests;`

			`use super::WideEncoding::{Utf16, Utf32};`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`use super::*;`

			`#[test]`
			`fn test_line_index() {`
			`let text = "hello\nworld";`
			`let table = [`
			`(00, 0, 0),`
			`(01, 0, 1),`
			`(05, 0, 5),`
			`(06, 1, 0),`
			`(07, 1, 1),`
			`(08, 1, 2),`
			`(10, 1, 4),`
			`(11, 1, 5),`
			`(12, 1, 6),`
			`];`

			`let index = LineIndex::new(text);`
Iterate over arrays dirrectly, instead of going through a slice 2023-01-14 12:50:13 +00:00			`for (offset, line, col) in table {`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`assert_eq!(index.line_col(offset.into()), LineCol { line, col });`
			`}`

			`let text = "\nhello\nworld";`
			`let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];`
			`let index = LineIndex::new(text);`
Iterate over arrays dirrectly, instead of going through a slice 2023-01-14 12:50:13 +00:00			`for (offset, line, col) in table {`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`assert_eq!(index.line_col(offset.into()), LineCol { line, col });`
			`}`
			`}`

			`#[test]`
			`fn test_char_len() {`
			`assert_eq!('メ'.len_utf8(), 3);`
			`assert_eq!('メ'.len_utf16(), 1);`
			`}`

			`#[test]`
			`fn test_empty_index() {`
			`let col_index = LineIndex::new(`
			`"`
			`const C: char = 'x';`
			`",`
			`);`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`assert_eq!(col_index.line_wide_chars.len(), 0);`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`}`

			`#[test]`
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`fn test_every_chars() {`
			`if skip_slow_tests() {`
			`return;`
			`}`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`let text: String = {`
			`let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!`
			`chars.extend("\n".repeat(chars.len() / 16).chars());`
			`let mut rng = oorandom::Rand32::new(stdx::rand::seed());`
			`stdx::rand::shuffle(&mut chars, \|i\| rng.rand_range(0..i as u32) as usize);`
			`chars.into_iter().collect()`
			`};`
			`assert!(text.contains('💩')); // Sanity check.`

			`let line_index = LineIndex::new(&text);`

			`let mut lin_col = LineCol { line: 0, col: 0 };`
			`let mut col_utf16 = 0;`
			`let mut col_utf32 = 0;`
			`for (offset, c) in text.char_indices() {`
			`let got_offset = line_index.offset(lin_col).unwrap();`
			`assert_eq!(usize::from(got_offset), offset);`

			`let got_lin_col = line_index.line_col(got_offset);`
			`assert_eq!(got_lin_col, lin_col);`

			`for enc in [Utf16, Utf32] {`
			`let wide_lin_col = line_index.to_wide(enc, lin_col);`
			`let got_lin_col = line_index.to_utf8(enc, wide_lin_col);`
			`assert_eq!(got_lin_col, lin_col);`

			`let want_col = match enc {`
			`Utf16 => col_utf16,`
			`Utf32 => col_utf32,`
			`};`
			`assert_eq!(wide_lin_col.col, want_col)`
			`}`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! 2023-02-14 00:56:28 +00:00			`if c == '\n' {`
			`lin_col.line += 1;`
			`lin_col.col = 0;`
			`col_utf16 = 0;`
			`col_utf32 = 0;`
			`} else {`
			`lin_col.col += c.len_utf8() as u32;`
			`col_utf16 += c.len_utf16() as u32;`
			`col_utf32 += 1;`
			`}`
			`}`
internal: Re-arrange ide_db modules 2022-03-06 18:01:30 +00:00			`}`

			`#[test]`
			`fn test_splitlines() {`
			`fn r(lo: u32, hi: u32) -> TextRange {`
			`TextRange::new(lo.into(), hi.into())`
			`}`

			`let text = "a\nbb\nccc\n";`
			`let line_index = LineIndex::new(text);`

			`let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();`
			`let expected = vec![r(0, 2), r(2, 5), r(5, 9)];`
			`assert_eq!(actual, expected);`

			`let text = "";`
			`let line_index = LineIndex::new(text);`

			`let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();`
			`let expected = vec![];`
			`assert_eq!(actual, expected);`

			`let text = "\n";`
			`let line_index = LineIndex::new(text);`

			`let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();`
			`let expected = vec![r(0, 1)];`
			`assert_eq!(actual, expected)`
			`}`
			`}`