Auto merge of #14141 - matklad:utf-32, r=lnicola

Support UTF-32 position encoding

Looks like this is a native encoding for Emacs at least!
This commit is contained in:
bors 2023-02-14 10:53:35 +00:00
commit 31486a639d
18 changed files with 210 additions and 158 deletions

1
Cargo.lock generated
View file

@ -711,6 +711,7 @@ dependencies = [
"limit", "limit",
"memchr", "memchr",
"once_cell", "once_cell",
"oorandom",
"parser", "parser",
"profile", "profile",
"rayon", "rayon",

View file

@ -37,8 +37,9 @@ text-edit.workspace = true
hir.workspace = true hir.workspace = true
[dev-dependencies] [dev-dependencies]
xshell = "0.2.2"
expect-test = "1.4.0" expect-test = "1.4.0"
oorandom = "11.1.3"
xshell = "0.2.2"
# local deps # local deps
test-utils.workspace = true test-utils.workspace = true

View file

@ -7,20 +7,13 @@ use syntax::{TextRange, TextSize};
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct LineIndex { pub struct LineIndex {
/// Offset the the beginning of each line, zero-based /// Offset the beginning of each line, zero-based.
pub(crate) newlines: Vec<TextSize>, pub(crate) newlines: Vec<TextSize>,
/// List of non-ASCII characters on each line /// List of non-ASCII characters on each line.
pub(crate) utf16_lines: NoHashHashMap<u32, Vec<Utf16Char>>, pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineColUtf16 {
/// Zero-based
pub line: u32,
/// Zero-based
pub col: u32,
} }
/// Line/Column information in native, utf8 format.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineCol { pub struct LineCol {
/// Zero-based /// Zero-based
@ -29,34 +22,57 @@ pub struct LineCol {
pub col: u32, pub col: u32,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum WideEncoding {
Utf16,
Utf32,
}
/// Line/Column information in legacy encodings.
///
/// Deliberately not a generic type and different from `LineCol`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct WideLineCol {
/// Zero-based
pub line: u32,
/// Zero-based
pub col: u32,
}
#[derive(Clone, Debug, Hash, PartialEq, Eq)] #[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub(crate) struct Utf16Char { pub(crate) struct WideChar {
/// Start offset of a character inside a line, zero-based /// Start offset of a character inside a line, zero-based
pub(crate) start: TextSize, pub(crate) start: TextSize,
/// End offset of a character inside a line, zero-based /// End offset of a character inside a line, zero-based
pub(crate) end: TextSize, pub(crate) end: TextSize,
} }
impl Utf16Char { impl WideChar {
/// Returns the length in 8-bit UTF-8 code units. /// Returns the length in 8-bit UTF-8 code units.
fn len(&self) -> TextSize { fn len(&self) -> TextSize {
self.end - self.start self.end - self.start
} }
/// Returns the length in 16-bit UTF-16 code units. /// Returns the length in UTF-16 or UTF-32 code units.
fn len_utf16(&self) -> usize { fn wide_len(&self, enc: WideEncoding) -> usize {
match enc {
WideEncoding::Utf16 => {
if self.len() == TextSize::from(4) { if self.len() == TextSize::from(4) {
2 2
} else { } else {
1 1
} }
} }
WideEncoding::Utf32 => 1,
}
}
} }
impl LineIndex { impl LineIndex {
pub fn new(text: &str) -> LineIndex { pub fn new(text: &str) -> LineIndex {
let mut utf16_lines = NoHashHashMap::default(); let mut line_wide_chars = NoHashHashMap::default();
let mut utf16_chars = Vec::new(); let mut wide_chars = Vec::new();
let mut newlines = Vec::with_capacity(16); let mut newlines = Vec::with_capacity(16);
newlines.push(TextSize::from(0)); newlines.push(TextSize::from(0));
@ -71,8 +87,8 @@ impl LineIndex {
newlines.push(curr_row); newlines.push(curr_row);
// Save any utf-16 characters seen in the previous line // Save any utf-16 characters seen in the previous line
if !utf16_chars.is_empty() { if !wide_chars.is_empty() {
utf16_lines.insert(line, mem::take(&mut utf16_chars)); line_wide_chars.insert(line, mem::take(&mut wide_chars));
} }
// Prepare for processing the next line // Prepare for processing the next line
@ -82,18 +98,18 @@ impl LineIndex {
} }
if !c.is_ascii() { if !c.is_ascii() {
utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len }); wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });
} }
curr_col += c_len; curr_col += c_len;
} }
// Save any utf-16 characters seen in the last line // Save any utf-16 characters seen in the last line
if !utf16_chars.is_empty() { if !wide_chars.is_empty() {
utf16_lines.insert(line, utf16_chars); line_wide_chars.insert(line, wide_chars);
} }
LineIndex { newlines, utf16_lines } LineIndex { newlines, line_wide_chars }
} }
pub fn line_col(&self, offset: TextSize) -> LineCol { pub fn line_col(&self, offset: TextSize) -> LineCol {
@ -109,13 +125,13 @@ impl LineIndex {
.map(|offset| offset + TextSize::from(line_col.col)) .map(|offset| offset + TextSize::from(line_col.col))
} }
pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 { pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into()); let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
LineColUtf16 { line: line_col.line, col: col as u32 } WideLineCol { line: line_col.line, col: col as u32 }
} }
pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol { pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
let col = self.utf16_to_utf8_col(line_col.line, line_col.col); let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
LineCol { line: line_col.line, col: col.into() } LineCol { line: line_col.line, col: col.into() }
} }
@ -132,12 +148,12 @@ impl LineIndex {
.filter(|it| !it.is_empty()) .filter(|it| !it.is_empty())
} }
fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize { fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {
let mut res: usize = col.into(); let mut res: usize = col.into();
if let Some(utf16_chars) = self.utf16_lines.get(&line) { if let Some(wide_chars) = self.line_wide_chars.get(&line) {
for c in utf16_chars { for c in wide_chars {
if c.end <= col { if c.end <= col {
res -= usize::from(c.len()) - c.len_utf16(); res -= usize::from(c.len()) - c.wide_len(enc);
} else { } else {
// From here on, all utf16 characters come *after* the character we are mapping, // From here on, all utf16 characters come *after* the character we are mapping,
// so we don't need to take them into account // so we don't need to take them into account
@ -148,11 +164,11 @@ impl LineIndex {
res res
} }
fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {
if let Some(utf16_chars) = self.utf16_lines.get(&line) { if let Some(wide_chars) = self.line_wide_chars.get(&line) {
for c in utf16_chars { for c in wide_chars {
if col > u32::from(c.start) { if col > u32::from(c.start) {
col += u32::from(c.len()) - c.len_utf16() as u32; col += u32::from(c.len()) - c.wide_len(enc) as u32;
} else { } else {
// From here on, all utf16 characters come *after* the character we are mapping, // From here on, all utf16 characters come *after* the character we are mapping,
// so we don't need to take them into account // so we don't need to take them into account
@ -167,6 +183,9 @@ impl LineIndex {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use test_utils::skip_slow_tests;
use super::WideEncoding::{Utf16, Utf32};
use super::*; use super::*;
#[test] #[test]
@ -210,67 +229,59 @@ mod tests {
const C: char = 'x'; const C: char = 'x';
", ",
); );
assert_eq!(col_index.utf16_lines.len(), 0); assert_eq!(col_index.line_wide_chars.len(), 0);
} }
#[test] #[test]
fn test_single_char() { fn test_every_chars() {
let col_index = LineIndex::new( if skip_slow_tests() {
" return;
const C: char = 'メ';
",
);
assert_eq!(col_index.utf16_lines.len(), 1);
assert_eq!(col_index.utf16_lines[&1].len(), 1);
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
// UTF-8 to UTF-16, no changes
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
// UTF-8 to UTF-16
assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
// UTF-16 to UTF-8, no changes
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
// UTF-16 to UTF-8
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
let col_index = LineIndex::new("a𐐏b");
assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
} }
#[test] let text: String = {
fn test_string() { let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
let col_index = LineIndex::new( chars.extend("\n".repeat(chars.len() / 16).chars());
" let mut rng = oorandom::Rand32::new(stdx::rand::seed());
const C: char = \"メ メ\"; stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
", chars.into_iter().collect()
); };
assert!(text.contains('💩')); // Sanity check.
assert_eq!(col_index.utf16_lines.len(), 1); let line_index = LineIndex::new(&text);
assert_eq!(col_index.utf16_lines[&1].len(), 2);
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() });
// UTF-8 to UTF-16 let mut lin_col = LineCol { line: 0, col: 0 };
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); let mut col_utf16 = 0;
let mut col_utf32 = 0;
for (offset, c) in text.char_indices() {
let got_offset = line_index.offset(lin_col).unwrap();
assert_eq!(usize::from(got_offset), offset);
assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19); let got_lin_col = line_index.line_col(got_offset);
assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21); assert_eq!(got_lin_col, lin_col);
assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15); for enc in [Utf16, Utf32] {
let wide_lin_col = line_index.to_wide(enc, lin_col);
let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
assert_eq!(got_lin_col, lin_col);
// UTF-16 to UTF-8 let want_col = match enc {
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); Utf16 => col_utf16,
Utf32 => col_utf32,
};
assert_eq!(wide_lin_col.col, want_col)
}
// メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1 if c == '\n' {
assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20 lin_col.line += 1;
assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space lin_col.col = 0;
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24 col_utf16 = 0;
col_utf32 = 0;
assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); } else {
lin_col.col += c.len_utf8() as u32;
col_utf16 += c.len_utf16() as u32;
col_utf32 += 1;
}
}
} }
#[test] #[test]

View file

@ -115,7 +115,7 @@ pub use ide_db::{
SourceRoot, SourceRootId, SourceRoot, SourceRootId,
}, },
label::Label, label::Label,
line_index::{LineCol, LineColUtf16, LineIndex}, line_index::{LineCol, LineIndex},
search::{ReferenceCategory, SearchScope}, search::{ReferenceCategory, SearchScope},
source_change::{FileSystemEdit, SourceChange}, source_change::{FileSystemEdit, SourceChange},
symbol_index::Query, symbol_index::Query,

View file

@ -18,7 +18,9 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
let crate_graph = db.crate_graph(); let crate_graph = db.crate_graph();
let mut shuffled_ids = crate_graph.iter().collect::<Vec<_>>(); let mut shuffled_ids = crate_graph.iter().collect::<Vec<_>>();
shuffle(&mut shuffled_ids);
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
stdx::rand::shuffle(&mut shuffled_ids, |i| rng.rand_range(0..i as u32) as usize);
let mut new_graph = CrateGraph::default(); let mut new_graph = CrateGraph::default();
@ -52,21 +54,3 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH); db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH);
} }
fn shuffle<T>(slice: &mut [T]) {
let mut rng = oorandom::Rand32::new(seed());
let mut remaining = slice.len() - 1;
while remaining > 0 {
let index = rng.rand_range(0..remaining as u32);
slice.swap(remaining, index as usize);
remaining -= 1;
}
}
fn seed() -> u64 {
use std::collections::hash_map::RandomState;
use std::hash::{BuildHasher, Hasher};
RandomState::new().build_hasher().finish()
}

View file

@ -1,4 +1,5 @@
//! Advertises the capabilities of the LSP Server. //! Advertises the capabilities of the LSP Server.
use ide_db::line_index::WideEncoding;
use lsp_types::{ use lsp_types::{
CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions, CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions,
CodeActionProviderCapability, CodeLensOptions, CompletionOptions, CodeActionProviderCapability, CodeLensOptions, CompletionOptions,
@ -16,16 +17,19 @@ use lsp_types::{
use serde_json::json; use serde_json::json;
use crate::config::{Config, RustfmtConfig}; use crate::config::{Config, RustfmtConfig};
use crate::lsp_ext::supports_utf8; use crate::line_index::PositionEncoding;
use crate::lsp_ext::negotiated_encoding;
use crate::semantic_tokens; use crate::semantic_tokens;
pub fn server_capabilities(config: &Config) -> ServerCapabilities { pub fn server_capabilities(config: &Config) -> ServerCapabilities {
ServerCapabilities { ServerCapabilities {
position_encoding: if supports_utf8(config.caps()) { position_encoding: Some(match negotiated_encoding(config.caps()) {
Some(PositionEncodingKind::UTF8) PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
} else { PositionEncoding::Wide(wide) => match wide {
None WideEncoding::Utf16 => PositionEncodingKind::UTF16,
WideEncoding::Utf32 => PositionEncodingKind::UTF32,
}, },
}),
text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions { text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {
open_close: Some(true), open_close: Some(true),
change: Some(TextDocumentSyncKind::INCREMENTAL), change: Some(TextDocumentSyncKind::INCREMENTAL),

View file

@ -11,6 +11,7 @@ use ide::{
use ide_db::LineIndexDatabase; use ide_db::LineIndexDatabase;
use ide_db::base_db::salsa::{self, ParallelDatabase}; use ide_db::base_db::salsa::{self, ParallelDatabase};
use ide_db::line_index::WideEncoding;
use lsp_types::{self, lsif}; use lsp_types::{self, lsif};
use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace}; use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace};
use vfs::{AbsPathBuf, Vfs}; use vfs::{AbsPathBuf, Vfs};
@ -127,7 +128,7 @@ impl LsifManager<'_> {
let line_index = self.db.line_index(file_id); let line_index = self.db.line_index(file_id);
let line_index = LineIndex { let line_index = LineIndex {
index: line_index, index: line_index,
encoding: PositionEncoding::Utf16, encoding: PositionEncoding::Wide(WideEncoding::Utf16),
endings: LineEndings::Unix, endings: LineEndings::Unix,
}; };
let range_id = self.add_vertex(lsif::Vertex::Range { let range_id = self.add_vertex(lsif::Vertex::Range {
@ -249,7 +250,7 @@ impl LsifManager<'_> {
let line_index = self.db.line_index(file_id); let line_index = self.db.line_index(file_id);
let line_index = LineIndex { let line_index = LineIndex {
index: line_index, index: line_index,
encoding: PositionEncoding::Utf16, encoding: PositionEncoding::Wide(WideEncoding::Utf16),
endings: LineEndings::Unix, endings: LineEndings::Unix,
}; };
let result = folds let result = folds

View file

@ -33,7 +33,7 @@ use crate::{
caps::completion_item_edit_resolve, caps::completion_item_edit_resolve,
diagnostics::DiagnosticsMapConfig, diagnostics::DiagnosticsMapConfig,
line_index::PositionEncoding, line_index::PositionEncoding,
lsp_ext::{self, supports_utf8, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope}, lsp_ext::{self, negotiated_encoding, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope},
}; };
mod patch_old_style; mod patch_old_style;
@ -999,11 +999,7 @@ impl Config {
} }
pub fn position_encoding(&self) -> PositionEncoding { pub fn position_encoding(&self) -> PositionEncoding {
if supports_utf8(&self.caps) { negotiated_encoding(&self.caps)
PositionEncoding::Utf8
} else {
PositionEncoding::Utf16
}
} }
fn experimental(&self, index: &'static str) -> bool { fn experimental(&self, index: &'static str) -> bool {

View file

@ -3,6 +3,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan}; use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};
use ide_db::line_index::WideEncoding;
use itertools::Itertools; use itertools::Itertools;
use stdx::format_to; use stdx::format_to;
use vfs::{AbsPath, AbsPathBuf}; use vfs::{AbsPath, AbsPathBuf};
@ -95,7 +96,8 @@ fn position(
let mut char_offset = 0; let mut char_offset = 0;
let len_func = match position_encoding { let len_func = match position_encoding {
PositionEncoding::Utf8 => char::len_utf8, PositionEncoding::Utf8 => char::len_utf8,
PositionEncoding::Utf16 => char::len_utf16, PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16,
PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1,
}; };
for c in line.text.chars() { for c in line.text.chars() {
char_offset += 1; char_offset += 1;

View file

@ -1,7 +1,10 @@
//! Conversion lsp_types types to rust-analyzer specific ones. //! Conversion lsp_types types to rust-analyzer specific ones.
use anyhow::format_err; use anyhow::format_err;
use ide::{Annotation, AnnotationKind, AssistKind, LineCol, LineColUtf16}; use ide::{Annotation, AnnotationKind, AssistKind, LineCol};
use ide_db::base_db::{FileId, FilePosition, FileRange}; use ide_db::{
base_db::{FileId, FilePosition, FileRange},
line_index::WideLineCol,
};
use syntax::{TextRange, TextSize}; use syntax::{TextRange, TextSize};
use vfs::AbsPathBuf; use vfs::AbsPathBuf;
@ -26,9 +29,9 @@ pub(crate) fn vfs_path(url: &lsp_types::Url) -> Result<vfs::VfsPath> {
pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result<TextSize> { pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result<TextSize> {
let line_col = match line_index.encoding { let line_col = match line_index.encoding {
PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character }, PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },
PositionEncoding::Utf16 => { PositionEncoding::Wide(enc) => {
let line_col = LineColUtf16 { line: position.line, col: position.character }; let line_col = WideLineCol { line: position.line, col: position.character };
line_index.index.to_utf8(line_col) line_index.index.to_utf8(enc, line_col)
} }
}; };
let text_size = let text_size =

View file

@ -7,9 +7,12 @@
use std::sync::Arc; use std::sync::Arc;
use ide_db::line_index::WideEncoding;
#[derive(Clone, Copy)]
pub enum PositionEncoding { pub enum PositionEncoding {
Utf8, Utf8,
Utf16, Wide(WideEncoding),
} }
pub(crate) struct LineIndex { pub(crate) struct LineIndex {

View file

@ -2,6 +2,7 @@
use std::{collections::HashMap, path::PathBuf}; use std::{collections::HashMap, path::PathBuf};
use ide_db::line_index::WideEncoding;
use lsp_types::request::Request; use lsp_types::request::Request;
use lsp_types::PositionEncodingKind; use lsp_types::PositionEncodingKind;
use lsp_types::{ use lsp_types::{
@ -10,6 +11,8 @@ use lsp_types::{
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::line_index::PositionEncoding;
pub enum AnalyzerStatus {} pub enum AnalyzerStatus {}
impl Request for AnalyzerStatus { impl Request for AnalyzerStatus {
@ -481,16 +484,22 @@ pub(crate) enum CodeLensResolveData {
References(lsp_types::TextDocumentPositionParams), References(lsp_types::TextDocumentPositionParams),
} }
pub fn supports_utf8(caps: &lsp_types::ClientCapabilities) -> bool { pub fn negotiated_encoding(caps: &lsp_types::ClientCapabilities) -> PositionEncoding {
match &caps.general { let client_encodings = match &caps.general {
Some(general) => general Some(general) => general.position_encodings.as_deref().unwrap_or_default(),
.position_encodings None => &[],
.as_deref() };
.unwrap_or_default()
.iter() for enc in client_encodings {
.any(|it| it == &PositionEncodingKind::UTF8), if enc == &PositionEncodingKind::UTF8 {
_ => false, return PositionEncoding::Utf8;
} else if enc == &PositionEncodingKind::UTF32 {
return PositionEncoding::Wide(WideEncoding::Utf32);
} }
// NB: intentionally prefer just about anything else to utf-16.
}
PositionEncoding::Wide(WideEncoding::Utf16)
} }
pub enum MoveItem {} pub enum MoveItem {}

View file

@ -161,6 +161,7 @@ impl GlobalState {
} }
pub(crate) fn apply_document_changes( pub(crate) fn apply_document_changes(
encoding: PositionEncoding,
file_contents: impl FnOnce() -> String, file_contents: impl FnOnce() -> String,
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>, mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
) -> String { ) -> String {
@ -192,9 +193,9 @@ pub(crate) fn apply_document_changes(
let mut line_index = LineIndex { let mut line_index = LineIndex {
// the index will be overwritten in the bottom loop's first iteration // the index will be overwritten in the bottom loop's first iteration
index: Arc::new(ide::LineIndex::new(&text)), index: Arc::new(ide::LineIndex::new(&text)),
// We don't care about line endings or offset encoding here. // We don't care about line endings here.
endings: LineEndings::Unix, endings: LineEndings::Unix,
encoding: PositionEncoding::Utf16, encoding,
}; };
// The changes we got must be applied sequentially, but can cross lines so we // The changes we got must be applied sequentially, but can cross lines so we
@ -256,6 +257,7 @@ pub(crate) fn all_edits_are_disjoint(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use ide_db::line_index::WideEncoding;
use lsp_types::{ use lsp_types::{
CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range, CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range,
TextDocumentContentChangeEvent, TextDocumentContentChangeEvent,
@ -278,9 +280,11 @@ mod tests {
}; };
} }
let text = apply_document_changes(|| String::new(), vec![]); let encoding = PositionEncoding::Wide(WideEncoding::Utf16);
let text = apply_document_changes(encoding, || String::new(), vec![]);
assert_eq!(text, ""); assert_eq!(text, "");
let text = apply_document_changes( let text = apply_document_changes(
encoding,
|| text, || text,
vec![TextDocumentContentChangeEvent { vec![TextDocumentContentChangeEvent {
range: None, range: None,
@ -289,39 +293,49 @@ mod tests {
}], }],
); );
assert_eq!(text, "the"); assert_eq!(text, "the");
let text = apply_document_changes(|| text, c![0, 3; 0, 3 => " quick"]); let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]);
assert_eq!(text, "the quick"); assert_eq!(text, "the quick");
let text = apply_document_changes(|| text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]); let text =
apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
assert_eq!(text, "quick foxes"); assert_eq!(text, "quick foxes");
let text = apply_document_changes(|| text, c![0, 11; 0, 11 => "\ndream"]); let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]);
assert_eq!(text, "quick foxes\ndream"); assert_eq!(text, "quick foxes\ndream");
let text = apply_document_changes(|| text, c![1, 0; 1, 0 => "have "]); let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]);
assert_eq!(text, "quick foxes\nhave dream"); assert_eq!(text, "quick foxes\nhave dream");
let text = apply_document_changes( let text = apply_document_changes(
encoding,
|| text, || text,
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"], c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
); );
assert_eq!(text, "the quick foxes\nhave quiet dreams\n"); assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
let text = apply_document_changes(|| text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]); let text = apply_document_changes(
encoding,
|| text,
c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"],
);
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n"); assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
let text = apply_document_changes( let text = apply_document_changes(
encoding,
|| text, || text,
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"], c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
); );
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n"); assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
let text = apply_document_changes(|| text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]); let text =
apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
assert_eq!(text, "the quick \nthey have quiet dreams\n"); assert_eq!(text, "the quick \nthey have quiet dreams\n");
let text = String::from("❤️"); let text = String::from("❤️");
let text = apply_document_changes(|| text, c![0, 0; 0, 0 => "a"]); let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]);
assert_eq!(text, "a❤"); assert_eq!(text, "a❤");
let text = String::from("a\nb"); let text = String::from("a\nb");
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]); let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
assert_eq!(text, "adcb"); assert_eq!(text, "adcb");
let text = String::from("a\nb"); let text = String::from("a\nb");
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]); let text =
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
assert_eq!(text, "ațc\ncb"); assert_eq!(text, "ațc\ncb");
} }

View file

@ -831,6 +831,7 @@ impl GlobalState {
let vfs = &mut this.vfs.write().0; let vfs = &mut this.vfs.write().0;
let file_id = vfs.file_id(&path).unwrap(); let file_id = vfs.file_id(&path).unwrap();
let text = apply_document_changes( let text = apply_document_changes(
this.config.position_encoding(),
|| std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(), || std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(),
params.content_changes, params.content_changes,
); );

View file

@ -31,8 +31,8 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P
let line_col = line_index.index.line_col(offset); let line_col = line_index.index.line_col(offset);
match line_index.encoding { match line_index.encoding {
PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col), PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),
PositionEncoding::Utf16 => { PositionEncoding::Wide(enc) => {
let line_col = line_index.index.to_utf16(line_col); let line_col = line_index.index.to_wide(enc, line_col);
lsp_types::Position::new(line_col.line, line_col.col) lsp_types::Position::new(line_col.line, line_col.col)
} }
} }
@ -1429,7 +1429,7 @@ fn main() {
let line_index = LineIndex { let line_index = LineIndex {
index: Arc::new(ide::LineIndex::new(text)), index: Arc::new(ide::LineIndex::new(text)),
endings: LineEndings::Unix, endings: LineEndings::Unix,
encoding: PositionEncoding::Utf16, encoding: PositionEncoding::Utf8,
}; };
let converted: Vec<lsp_types::FoldingRange> = let converted: Vec<lsp_types::FoldingRange> =
folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect(); folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect();

View file

@ -11,6 +11,7 @@ pub mod hash;
pub mod process; pub mod process;
pub mod panic_context; pub mod panic_context;
pub mod non_empty_vec; pub mod non_empty_vec;
pub mod rand;
pub use always_assert::{always, never}; pub use always_assert::{always, never};

21
crates/stdx/src/rand.rs Normal file
View file

@ -0,0 +1,21 @@
//! We don't use `rand`, as that's too many things for us.
//!
//! Currently, we use oorandom instead, but it misses these two utilities.
//! Perhaps we should switch to `fastrand`, or our own small prng, it's not like
//! we need anything move complicatied that xor-shift.
pub fn shuffle<T>(slice: &mut [T], mut rand_index: impl FnMut(usize) -> usize) {
let mut remaining = slice.len() - 1;
while remaining > 0 {
let index = rand_index(remaining);
slice.swap(remaining, index);
remaining -= 1;
}
}
pub fn seed() -> u64 {
use std::collections::hash_map::RandomState;
use std::hash::{BuildHasher, Hasher};
RandomState::new().build_hasher().finish()
}

View file

@ -1,5 +1,5 @@
<!--- <!---
lsp_ext.rs hash: ec29403e67dfd15b lsp_ext.rs hash: d87477896dfe41d4
If you need to change the above hash to make the test pass, please check if you If you need to change the above hash to make the test pass, please check if you
need to adjust this doc as well and ping this issue: need to adjust this doc as well and ping this issue: