From 0da27376cf3768d92bcd0c094b52da50146dc70f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 14 Feb 2023 00:56:28 +0000 Subject: [PATCH 1/2] Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least! --- Cargo.lock | 1 + crates/ide-db/Cargo.toml | 3 +- crates/ide-db/src/line_index.rs | 193 +++++++++--------- crates/ide/src/lib.rs | 2 +- crates/ide/src/shuffle_crate_graph.rs | 22 +- crates/rust-analyzer/src/caps.rs | 16 +- crates/rust-analyzer/src/cli/lsif.rs | 5 +- crates/rust-analyzer/src/config.rs | 8 +- .../rust-analyzer/src/diagnostics/to_proto.rs | 4 +- crates/rust-analyzer/src/from_proto.rs | 13 +- crates/rust-analyzer/src/line_index.rs | 5 +- crates/rust-analyzer/src/lsp_ext.rs | 27 ++- crates/rust-analyzer/src/lsp_utils.rs | 38 ++-- crates/rust-analyzer/src/main_loop.rs | 1 + crates/rust-analyzer/src/to_proto.rs | 6 +- crates/stdx/src/lib.rs | 1 + crates/stdx/src/rand.rs | 21 ++ docs/dev/lsp-extensions.md | 2 +- 18 files changed, 210 insertions(+), 158 deletions(-) create mode 100644 crates/stdx/src/rand.rs diff --git a/Cargo.lock b/Cargo.lock index ef0316f30f..6d8a3eeb73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -711,6 +711,7 @@ dependencies = [ "limit", "memchr", "once_cell", + "oorandom", "parser", "profile", "rayon", diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml index 9672bb9b7b..57daaf623d 100644 --- a/crates/ide-db/Cargo.toml +++ b/crates/ide-db/Cargo.toml @@ -37,8 +37,9 @@ text-edit.workspace = true hir.workspace = true [dev-dependencies] -xshell = "0.2.2" expect-test = "1.4.0" +oorandom = "11.1.3" +xshell = "0.2.2" # local deps test-utils.workspace = true diff --git a/crates/ide-db/src/line_index.rs b/crates/ide-db/src/line_index.rs index 8f12ab3340..c17ca95f5d 100644 --- a/crates/ide-db/src/line_index.rs +++ b/crates/ide-db/src/line_index.rs @@ -7,20 +7,13 @@ use syntax::{TextRange, TextSize}; #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { - /// Offset the the beginning of each line, zero-based + /// Offset the the beginning of each line, zero-based. pub(crate) newlines: Vec, - /// List of non-ASCII characters on each line - pub(crate) utf16_lines: NoHashHashMap>, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct LineColUtf16 { - /// Zero-based - pub line: u32, - /// Zero-based - pub col: u32, + /// List of non-ASCII characters on each line. + pub(crate) line_wide_chars: NoHashHashMap>, } +/// Line/Column information in native, utf8 format. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct LineCol { /// Zero-based @@ -29,34 +22,57 @@ pub struct LineCol { pub col: u32, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum WideEncoding { + Utf16, + Utf32, +} + +/// Line/Column information in legacy encodings. +/// +/// Deliberately not a generic type and different from `LineCol`. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct WideLineCol { + /// Zero-based + pub line: u32, + /// Zero-based + pub col: u32, +} + #[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub(crate) struct Utf16Char { +pub(crate) struct WideChar { /// Start offset of a character inside a line, zero-based pub(crate) start: TextSize, /// End offset of a character inside a line, zero-based pub(crate) end: TextSize, } -impl Utf16Char { +impl WideChar { /// Returns the length in 8-bit UTF-8 code units. fn len(&self) -> TextSize { self.end - self.start } - /// Returns the length in 16-bit UTF-16 code units. - fn len_utf16(&self) -> usize { - if self.len() == TextSize::from(4) { - 2 - } else { - 1 + /// Returns the length in UTF-16 or UTF-32 code units. + fn wide_len(&self, enc: WideEncoding) -> usize { + match enc { + WideEncoding::Utf16 => { + if self.len() == TextSize::from(4) { + 2 + } else { + 1 + } + } + + WideEncoding::Utf32 => 1, } } } impl LineIndex { pub fn new(text: &str) -> LineIndex { - let mut utf16_lines = NoHashHashMap::default(); - let mut utf16_chars = Vec::new(); + let mut line_wide_chars = NoHashHashMap::default(); + let mut wide_chars = Vec::new(); let mut newlines = Vec::with_capacity(16); newlines.push(TextSize::from(0)); @@ -71,8 +87,8 @@ impl LineIndex { newlines.push(curr_row); // Save any utf-16 characters seen in the previous line - if !utf16_chars.is_empty() { - utf16_lines.insert(line, mem::take(&mut utf16_chars)); + if !wide_chars.is_empty() { + line_wide_chars.insert(line, mem::take(&mut wide_chars)); } // Prepare for processing the next line @@ -82,18 +98,18 @@ impl LineIndex { } if !c.is_ascii() { - utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len }); + wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len }); } curr_col += c_len; } // Save any utf-16 characters seen in the last line - if !utf16_chars.is_empty() { - utf16_lines.insert(line, utf16_chars); + if !wide_chars.is_empty() { + line_wide_chars.insert(line, wide_chars); } - LineIndex { newlines, utf16_lines } + LineIndex { newlines, line_wide_chars } } pub fn line_col(&self, offset: TextSize) -> LineCol { @@ -109,13 +125,13 @@ impl LineIndex { .map(|offset| offset + TextSize::from(line_col.col)) } - pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 { - let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into()); - LineColUtf16 { line: line_col.line, col: col as u32 } + pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol { + let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); + WideLineCol { line: line_col.line, col: col as u32 } } - pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol { - let col = self.utf16_to_utf8_col(line_col.line, line_col.col); + pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { + let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); LineCol { line: line_col.line, col: col.into() } } @@ -132,12 +148,12 @@ impl LineIndex { .filter(|it| !it.is_empty()) } - fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize { + fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize { let mut res: usize = col.into(); - if let Some(utf16_chars) = self.utf16_lines.get(&line) { - for c in utf16_chars { + if let Some(wide_chars) = self.line_wide_chars.get(&line) { + for c in wide_chars { if c.end <= col { - res -= usize::from(c.len()) - c.len_utf16(); + res -= usize::from(c.len()) - c.wide_len(enc); } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -148,11 +164,11 @@ impl LineIndex { res } - fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { - if let Some(utf16_chars) = self.utf16_lines.get(&line) { - for c in utf16_chars { + fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize { + if let Some(wide_chars) = self.line_wide_chars.get(&line) { + for c in wide_chars { if col > u32::from(c.start) { - col += u32::from(c.len()) - c.len_utf16() as u32; + col += u32::from(c.len()) - c.wide_len(enc) as u32; } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -167,6 +183,9 @@ impl LineIndex { #[cfg(test)] mod tests { + use test_utils::skip_slow_tests; + + use super::WideEncoding::{Utf16, Utf32}; use super::*; #[test] @@ -210,67 +229,59 @@ mod tests { const C: char = 'x'; ", ); - assert_eq!(col_index.utf16_lines.len(), 0); + assert_eq!(col_index.line_wide_chars.len(), 0); } #[test] - fn test_single_char() { - let col_index = LineIndex::new( - " -const C: char = 'メ'; -", - ); + fn test_every_chars() { + if skip_slow_tests() { + return; + } - assert_eq!(col_index.utf16_lines.len(), 1); - assert_eq!(col_index.utf16_lines[&1].len(), 1); - assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() }); + let text: String = { + let mut chars: Vec = ((0 as char)..char::MAX).collect(); // Neat! + chars.extend("\n".repeat(chars.len() / 16).chars()); + let mut rng = oorandom::Rand32::new(stdx::rand::seed()); + stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize); + chars.into_iter().collect() + }; + assert!(text.contains('💩')); // Sanity check. - // UTF-8 to UTF-16, no changes - assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); + let line_index = LineIndex::new(&text); - // UTF-8 to UTF-16 - assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20); + let mut lin_col = LineCol { line: 0, col: 0 }; + let mut col_utf16 = 0; + let mut col_utf32 = 0; + for (offset, c) in text.char_indices() { + let got_offset = line_index.offset(lin_col).unwrap(); + assert_eq!(usize::from(got_offset), offset); - // UTF-16 to UTF-8, no changes - assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); + let got_lin_col = line_index.line_col(got_offset); + assert_eq!(got_lin_col, lin_col); - // UTF-16 to UTF-8 - assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); + for enc in [Utf16, Utf32] { + let wide_lin_col = line_index.to_wide(enc, lin_col); + let got_lin_col = line_index.to_utf8(enc, wide_lin_col); + assert_eq!(got_lin_col, lin_col); - let col_index = LineIndex::new("a𐐏b"); - assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5)); - } + let want_col = match enc { + Utf16 => col_utf16, + Utf32 => col_utf32, + }; + assert_eq!(wide_lin_col.col, want_col) + } - #[test] - fn test_string() { - let col_index = LineIndex::new( - " -const C: char = \"メ メ\"; -", - ); - - assert_eq!(col_index.utf16_lines.len(), 1); - assert_eq!(col_index.utf16_lines[&1].len(), 2); - assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() }); - assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() }); - - // UTF-8 to UTF-16 - assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); - - assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19); - assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21); - - assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15); - - // UTF-16 to UTF-8 - assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); - - // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1 - assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20 - assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space - assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24 - - assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); + if c == '\n' { + lin_col.line += 1; + lin_col.col = 0; + col_utf16 = 0; + col_utf32 = 0; + } else { + lin_col.col += c.len_utf8() as u32; + col_utf16 += c.len_utf16() as u32; + col_utf32 += 1; + } + } } #[test] diff --git a/crates/ide/src/lib.rs b/crates/ide/src/lib.rs index 4ead9d4d0a..f2b535bdc7 100644 --- a/crates/ide/src/lib.rs +++ b/crates/ide/src/lib.rs @@ -115,7 +115,7 @@ pub use ide_db::{ SourceRoot, SourceRootId, }, label::Label, - line_index::{LineCol, LineColUtf16, LineIndex}, + line_index::{LineCol, LineIndex}, search::{ReferenceCategory, SearchScope}, source_change::{FileSystemEdit, SourceChange}, symbol_index::Query, diff --git a/crates/ide/src/shuffle_crate_graph.rs b/crates/ide/src/shuffle_crate_graph.rs index ae539a5d39..e606072a82 100644 --- a/crates/ide/src/shuffle_crate_graph.rs +++ b/crates/ide/src/shuffle_crate_graph.rs @@ -18,7 +18,9 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) { let crate_graph = db.crate_graph(); let mut shuffled_ids = crate_graph.iter().collect::>(); - shuffle(&mut shuffled_ids); + + let mut rng = oorandom::Rand32::new(stdx::rand::seed()); + stdx::rand::shuffle(&mut shuffled_ids, |i| rng.rand_range(0..i as u32) as usize); let mut new_graph = CrateGraph::default(); @@ -52,21 +54,3 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) { db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH); } - -fn shuffle(slice: &mut [T]) { - let mut rng = oorandom::Rand32::new(seed()); - - let mut remaining = slice.len() - 1; - while remaining > 0 { - let index = rng.rand_range(0..remaining as u32); - slice.swap(remaining, index as usize); - remaining -= 1; - } -} - -fn seed() -> u64 { - use std::collections::hash_map::RandomState; - use std::hash::{BuildHasher, Hasher}; - - RandomState::new().build_hasher().finish() -} diff --git a/crates/rust-analyzer/src/caps.rs b/crates/rust-analyzer/src/caps.rs index 841861635c..a9ed05021d 100644 --- a/crates/rust-analyzer/src/caps.rs +++ b/crates/rust-analyzer/src/caps.rs @@ -1,4 +1,5 @@ //! Advertises the capabilities of the LSP Server. +use ide_db::line_index::WideEncoding; use lsp_types::{ CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions, CodeActionProviderCapability, CodeLensOptions, CompletionOptions, @@ -16,16 +17,19 @@ use lsp_types::{ use serde_json::json; use crate::config::{Config, RustfmtConfig}; -use crate::lsp_ext::supports_utf8; +use crate::line_index::PositionEncoding; +use crate::lsp_ext::negotiated_encoding; use crate::semantic_tokens; pub fn server_capabilities(config: &Config) -> ServerCapabilities { ServerCapabilities { - position_encoding: if supports_utf8(config.caps()) { - Some(PositionEncodingKind::UTF8) - } else { - None - }, + position_encoding: Some(match negotiated_encoding(config.caps()) { + PositionEncoding::Utf8 => PositionEncodingKind::UTF8, + PositionEncoding::Wide(wide) => match wide { + WideEncoding::Utf16 => PositionEncodingKind::UTF16, + WideEncoding::Utf32 => PositionEncodingKind::UTF32, + }, + }), text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions { open_close: Some(true), change: Some(TextDocumentSyncKind::INCREMENTAL), diff --git a/crates/rust-analyzer/src/cli/lsif.rs b/crates/rust-analyzer/src/cli/lsif.rs index 60a7f99ccd..3fc1aa4eae 100644 --- a/crates/rust-analyzer/src/cli/lsif.rs +++ b/crates/rust-analyzer/src/cli/lsif.rs @@ -11,6 +11,7 @@ use ide::{ use ide_db::LineIndexDatabase; use ide_db::base_db::salsa::{self, ParallelDatabase}; +use ide_db::line_index::WideEncoding; use lsp_types::{self, lsif}; use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace}; use vfs::{AbsPathBuf, Vfs}; @@ -127,7 +128,7 @@ impl LsifManager<'_> { let line_index = self.db.line_index(file_id); let line_index = LineIndex { index: line_index, - encoding: PositionEncoding::Utf16, + encoding: PositionEncoding::Wide(WideEncoding::Utf16), endings: LineEndings::Unix, }; let range_id = self.add_vertex(lsif::Vertex::Range { @@ -249,7 +250,7 @@ impl LsifManager<'_> { let line_index = self.db.line_index(file_id); let line_index = LineIndex { index: line_index, - encoding: PositionEncoding::Utf16, + encoding: PositionEncoding::Wide(WideEncoding::Utf16), endings: LineEndings::Unix, }; let result = folds diff --git a/crates/rust-analyzer/src/config.rs b/crates/rust-analyzer/src/config.rs index be09938c2c..f609a50a05 100644 --- a/crates/rust-analyzer/src/config.rs +++ b/crates/rust-analyzer/src/config.rs @@ -33,7 +33,7 @@ use crate::{ caps::completion_item_edit_resolve, diagnostics::DiagnosticsMapConfig, line_index::PositionEncoding, - lsp_ext::{self, supports_utf8, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope}, + lsp_ext::{self, negotiated_encoding, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope}, }; mod patch_old_style; @@ -999,11 +999,7 @@ impl Config { } pub fn position_encoding(&self) -> PositionEncoding { - if supports_utf8(&self.caps) { - PositionEncoding::Utf8 - } else { - PositionEncoding::Utf16 - } + negotiated_encoding(&self.caps) } fn experimental(&self, index: &'static str) -> bool { diff --git a/crates/rust-analyzer/src/diagnostics/to_proto.rs b/crates/rust-analyzer/src/diagnostics/to_proto.rs index 55b89019b4..415fa4e02f 100644 --- a/crates/rust-analyzer/src/diagnostics/to_proto.rs +++ b/crates/rust-analyzer/src/diagnostics/to_proto.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan}; +use ide_db::line_index::WideEncoding; use itertools::Itertools; use stdx::format_to; use vfs::{AbsPath, AbsPathBuf}; @@ -95,7 +96,8 @@ fn position( let mut char_offset = 0; let len_func = match position_encoding { PositionEncoding::Utf8 => char::len_utf8, - PositionEncoding::Utf16 => char::len_utf16, + PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16, + PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1, }; for c in line.text.chars() { char_offset += 1; diff --git a/crates/rust-analyzer/src/from_proto.rs b/crates/rust-analyzer/src/from_proto.rs index 2dbb14fcd9..50af38cd6f 100644 --- a/crates/rust-analyzer/src/from_proto.rs +++ b/crates/rust-analyzer/src/from_proto.rs @@ -1,7 +1,10 @@ //! Conversion lsp_types types to rust-analyzer specific ones. use anyhow::format_err; -use ide::{Annotation, AnnotationKind, AssistKind, LineCol, LineColUtf16}; -use ide_db::base_db::{FileId, FilePosition, FileRange}; +use ide::{Annotation, AnnotationKind, AssistKind, LineCol}; +use ide_db::{ + base_db::{FileId, FilePosition, FileRange}, + line_index::WideLineCol, +}; use syntax::{TextRange, TextSize}; use vfs::AbsPathBuf; @@ -26,9 +29,9 @@ pub(crate) fn vfs_path(url: &lsp_types::Url) -> Result { pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result { let line_col = match line_index.encoding { PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character }, - PositionEncoding::Utf16 => { - let line_col = LineColUtf16 { line: position.line, col: position.character }; - line_index.index.to_utf8(line_col) + PositionEncoding::Wide(enc) => { + let line_col = WideLineCol { line: position.line, col: position.character }; + line_index.index.to_utf8(enc, line_col) } }; let text_size = diff --git a/crates/rust-analyzer/src/line_index.rs b/crates/rust-analyzer/src/line_index.rs index 2945dba12f..791cd931d4 100644 --- a/crates/rust-analyzer/src/line_index.rs +++ b/crates/rust-analyzer/src/line_index.rs @@ -7,9 +7,12 @@ use std::sync::Arc; +use ide_db::line_index::WideEncoding; + +#[derive(Clone, Copy)] pub enum PositionEncoding { Utf8, - Utf16, + Wide(WideEncoding), } pub(crate) struct LineIndex { diff --git a/crates/rust-analyzer/src/lsp_ext.rs b/crates/rust-analyzer/src/lsp_ext.rs index 08b2c837de..e33589cc53 100644 --- a/crates/rust-analyzer/src/lsp_ext.rs +++ b/crates/rust-analyzer/src/lsp_ext.rs @@ -2,6 +2,7 @@ use std::{collections::HashMap, path::PathBuf}; +use ide_db::line_index::WideEncoding; use lsp_types::request::Request; use lsp_types::PositionEncodingKind; use lsp_types::{ @@ -10,6 +11,8 @@ use lsp_types::{ }; use serde::{Deserialize, Serialize}; +use crate::line_index::PositionEncoding; + pub enum AnalyzerStatus {} impl Request for AnalyzerStatus { @@ -481,16 +484,22 @@ pub(crate) enum CodeLensResolveData { References(lsp_types::TextDocumentPositionParams), } -pub fn supports_utf8(caps: &lsp_types::ClientCapabilities) -> bool { - match &caps.general { - Some(general) => general - .position_encodings - .as_deref() - .unwrap_or_default() - .iter() - .any(|it| it == &PositionEncodingKind::UTF8), - _ => false, +pub fn negotiated_encoding(caps: &lsp_types::ClientCapabilities) -> PositionEncoding { + let client_encodings = match &caps.general { + Some(general) => general.position_encodings.as_deref().unwrap_or_default(), + None => &[], + }; + + for enc in client_encodings { + if enc == &PositionEncodingKind::UTF8 { + return PositionEncoding::Utf8; + } else if enc == &PositionEncodingKind::UTF32 { + return PositionEncoding::Wide(WideEncoding::Utf32); + } + // NB: intentionally prefer just about anything else to utf-16. } + + PositionEncoding::Wide(WideEncoding::Utf16) } pub enum MoveItem {} diff --git a/crates/rust-analyzer/src/lsp_utils.rs b/crates/rust-analyzer/src/lsp_utils.rs index baa77a005e..30f1c53c19 100644 --- a/crates/rust-analyzer/src/lsp_utils.rs +++ b/crates/rust-analyzer/src/lsp_utils.rs @@ -161,6 +161,7 @@ impl GlobalState { } pub(crate) fn apply_document_changes( + encoding: PositionEncoding, file_contents: impl FnOnce() -> String, mut content_changes: Vec, ) -> String { @@ -192,9 +193,9 @@ pub(crate) fn apply_document_changes( let mut line_index = LineIndex { // the index will be overwritten in the bottom loop's first iteration index: Arc::new(ide::LineIndex::new(&text)), - // We don't care about line endings or offset encoding here. + // We don't care about line endings here. endings: LineEndings::Unix, - encoding: PositionEncoding::Utf16, + encoding, }; // The changes we got must be applied sequentially, but can cross lines so we @@ -256,6 +257,7 @@ pub(crate) fn all_edits_are_disjoint( #[cfg(test)] mod tests { + use ide_db::line_index::WideEncoding; use lsp_types::{ CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range, TextDocumentContentChangeEvent, @@ -278,9 +280,11 @@ mod tests { }; } - let text = apply_document_changes(|| String::new(), vec![]); + let encoding = PositionEncoding::Wide(WideEncoding::Utf16); + let text = apply_document_changes(encoding, || String::new(), vec![]); assert_eq!(text, ""); let text = apply_document_changes( + encoding, || text, vec![TextDocumentContentChangeEvent { range: None, @@ -289,39 +293,49 @@ mod tests { }], ); assert_eq!(text, "the"); - let text = apply_document_changes(|| text, c![0, 3; 0, 3 => " quick"]); + let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]); assert_eq!(text, "the quick"); - let text = apply_document_changes(|| text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]); + let text = + apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]); assert_eq!(text, "quick foxes"); - let text = apply_document_changes(|| text, c![0, 11; 0, 11 => "\ndream"]); + let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]); assert_eq!(text, "quick foxes\ndream"); - let text = apply_document_changes(|| text, c![1, 0; 1, 0 => "have "]); + let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]); assert_eq!(text, "quick foxes\nhave dream"); let text = apply_document_changes( + encoding, || text, c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"], ); assert_eq!(text, "the quick foxes\nhave quiet dreams\n"); - let text = apply_document_changes(|| text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]); + let text = apply_document_changes( + encoding, + || text, + c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"], + ); assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n"); let text = apply_document_changes( + encoding, || text, c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"], ); assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n"); - let text = apply_document_changes(|| text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]); + let text = + apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]); assert_eq!(text, "the quick \nthey have quiet dreams\n"); let text = String::from("❤️"); - let text = apply_document_changes(|| text, c![0, 0; 0, 0 => "a"]); + let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]); assert_eq!(text, "a❤️"); let text = String::from("a\nb"); - let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]); + let text = + apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]); assert_eq!(text, "adcb"); let text = String::from("a\nb"); - let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]); + let text = + apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]); assert_eq!(text, "ațc\ncb"); } diff --git a/crates/rust-analyzer/src/main_loop.rs b/crates/rust-analyzer/src/main_loop.rs index 346a74e270..d1e38b33c7 100644 --- a/crates/rust-analyzer/src/main_loop.rs +++ b/crates/rust-analyzer/src/main_loop.rs @@ -831,6 +831,7 @@ impl GlobalState { let vfs = &mut this.vfs.write().0; let file_id = vfs.file_id(&path).unwrap(); let text = apply_document_changes( + this.config.position_encoding(), || std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(), params.content_changes, ); diff --git a/crates/rust-analyzer/src/to_proto.rs b/crates/rust-analyzer/src/to_proto.rs index 5bdc1bf8d9..78cd4b8e2b 100644 --- a/crates/rust-analyzer/src/to_proto.rs +++ b/crates/rust-analyzer/src/to_proto.rs @@ -31,8 +31,8 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P let line_col = line_index.index.line_col(offset); match line_index.encoding { PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col), - PositionEncoding::Utf16 => { - let line_col = line_index.index.to_utf16(line_col); + PositionEncoding::Wide(enc) => { + let line_col = line_index.index.to_wide(enc, line_col); lsp_types::Position::new(line_col.line, line_col.col) } } @@ -1429,7 +1429,7 @@ fn main() { let line_index = LineIndex { index: Arc::new(ide::LineIndex::new(text)), endings: LineEndings::Unix, - encoding: PositionEncoding::Utf16, + encoding: PositionEncoding::Utf8, }; let converted: Vec = folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect(); diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index bd24d7d28b..5639aaf57c 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -11,6 +11,7 @@ pub mod hash; pub mod process; pub mod panic_context; pub mod non_empty_vec; +pub mod rand; pub use always_assert::{always, never}; diff --git a/crates/stdx/src/rand.rs b/crates/stdx/src/rand.rs new file mode 100644 index 0000000000..b38506caef --- /dev/null +++ b/crates/stdx/src/rand.rs @@ -0,0 +1,21 @@ +//! We don't use `rand`, as that's too many things for us. +//! +//! Currently, we use oorandom instead, but it misses these two utilities. +//! Perhaps we should switch to `fastrand`, or our own small prng, it's not like +//! we need anything move complicatied that xor-shift. + +pub fn shuffle(slice: &mut [T], mut rand_index: impl FnMut(usize) -> usize) { + let mut remaining = slice.len() - 1; + while remaining > 0 { + let index = rand_index(remaining); + slice.swap(remaining, index); + remaining -= 1; + } +} + +pub fn seed() -> u64 { + use std::collections::hash_map::RandomState; + use std::hash::{BuildHasher, Hasher}; + + RandomState::new().build_hasher().finish() +} diff --git a/docs/dev/lsp-extensions.md b/docs/dev/lsp-extensions.md index a794e86618..c3623a5cc4 100644 --- a/docs/dev/lsp-extensions.md +++ b/docs/dev/lsp-extensions.md @@ -1,5 +1,5 @@