mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-16 23:24:03 +00:00
Auto merge of #14141 - matklad:utf-32, r=lnicola
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least!
This commit is contained in:
commit
31486a639d
18 changed files with 210 additions and 158 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -711,6 +711,7 @@ dependencies = [
|
|||
"limit",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"oorandom",
|
||||
"parser",
|
||||
"profile",
|
||||
"rayon",
|
||||
|
|
|
@ -37,8 +37,9 @@ text-edit.workspace = true
|
|||
hir.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
xshell = "0.2.2"
|
||||
expect-test = "1.4.0"
|
||||
oorandom = "11.1.3"
|
||||
xshell = "0.2.2"
|
||||
|
||||
# local deps
|
||||
test-utils.workspace = true
|
||||
|
|
|
@ -7,20 +7,13 @@ use syntax::{TextRange, TextSize};
|
|||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct LineIndex {
|
||||
/// Offset the the beginning of each line, zero-based
|
||||
/// Offset the beginning of each line, zero-based.
|
||||
pub(crate) newlines: Vec<TextSize>,
|
||||
/// List of non-ASCII characters on each line
|
||||
pub(crate) utf16_lines: NoHashHashMap<u32, Vec<Utf16Char>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct LineColUtf16 {
|
||||
/// Zero-based
|
||||
pub line: u32,
|
||||
/// Zero-based
|
||||
pub col: u32,
|
||||
/// List of non-ASCII characters on each line.
|
||||
pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,
|
||||
}
|
||||
|
||||
/// Line/Column information in native, utf8 format.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct LineCol {
|
||||
/// Zero-based
|
||||
|
@ -29,34 +22,57 @@ pub struct LineCol {
|
|||
pub col: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum WideEncoding {
|
||||
Utf16,
|
||||
Utf32,
|
||||
}
|
||||
|
||||
/// Line/Column information in legacy encodings.
|
||||
///
|
||||
/// Deliberately not a generic type and different from `LineCol`.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct WideLineCol {
|
||||
/// Zero-based
|
||||
pub line: u32,
|
||||
/// Zero-based
|
||||
pub col: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
pub(crate) struct Utf16Char {
|
||||
pub(crate) struct WideChar {
|
||||
/// Start offset of a character inside a line, zero-based
|
||||
pub(crate) start: TextSize,
|
||||
/// End offset of a character inside a line, zero-based
|
||||
pub(crate) end: TextSize,
|
||||
}
|
||||
|
||||
impl Utf16Char {
|
||||
impl WideChar {
|
||||
/// Returns the length in 8-bit UTF-8 code units.
|
||||
fn len(&self) -> TextSize {
|
||||
self.end - self.start
|
||||
}
|
||||
|
||||
/// Returns the length in 16-bit UTF-16 code units.
|
||||
fn len_utf16(&self) -> usize {
|
||||
if self.len() == TextSize::from(4) {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
/// Returns the length in UTF-16 or UTF-32 code units.
|
||||
fn wide_len(&self, enc: WideEncoding) -> usize {
|
||||
match enc {
|
||||
WideEncoding::Utf16 => {
|
||||
if self.len() == TextSize::from(4) {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
WideEncoding::Utf32 => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
pub fn new(text: &str) -> LineIndex {
|
||||
let mut utf16_lines = NoHashHashMap::default();
|
||||
let mut utf16_chars = Vec::new();
|
||||
let mut line_wide_chars = NoHashHashMap::default();
|
||||
let mut wide_chars = Vec::new();
|
||||
|
||||
let mut newlines = Vec::with_capacity(16);
|
||||
newlines.push(TextSize::from(0));
|
||||
|
@ -71,8 +87,8 @@ impl LineIndex {
|
|||
newlines.push(curr_row);
|
||||
|
||||
// Save any utf-16 characters seen in the previous line
|
||||
if !utf16_chars.is_empty() {
|
||||
utf16_lines.insert(line, mem::take(&mut utf16_chars));
|
||||
if !wide_chars.is_empty() {
|
||||
line_wide_chars.insert(line, mem::take(&mut wide_chars));
|
||||
}
|
||||
|
||||
// Prepare for processing the next line
|
||||
|
@ -82,18 +98,18 @@ impl LineIndex {
|
|||
}
|
||||
|
||||
if !c.is_ascii() {
|
||||
utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len });
|
||||
wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });
|
||||
}
|
||||
|
||||
curr_col += c_len;
|
||||
}
|
||||
|
||||
// Save any utf-16 characters seen in the last line
|
||||
if !utf16_chars.is_empty() {
|
||||
utf16_lines.insert(line, utf16_chars);
|
||||
if !wide_chars.is_empty() {
|
||||
line_wide_chars.insert(line, wide_chars);
|
||||
}
|
||||
|
||||
LineIndex { newlines, utf16_lines }
|
||||
LineIndex { newlines, line_wide_chars }
|
||||
}
|
||||
|
||||
pub fn line_col(&self, offset: TextSize) -> LineCol {
|
||||
|
@ -109,13 +125,13 @@ impl LineIndex {
|
|||
.map(|offset| offset + TextSize::from(line_col.col))
|
||||
}
|
||||
|
||||
pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
|
||||
let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
|
||||
LineColUtf16 { line: line_col.line, col: col as u32 }
|
||||
pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
|
||||
let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
|
||||
WideLineCol { line: line_col.line, col: col as u32 }
|
||||
}
|
||||
|
||||
pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
|
||||
let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
|
||||
pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
|
||||
let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
|
||||
LineCol { line: line_col.line, col: col.into() }
|
||||
}
|
||||
|
||||
|
@ -132,12 +148,12 @@ impl LineIndex {
|
|||
.filter(|it| !it.is_empty())
|
||||
}
|
||||
|
||||
fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
|
||||
fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {
|
||||
let mut res: usize = col.into();
|
||||
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
|
||||
for c in utf16_chars {
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||
for c in wide_chars {
|
||||
if c.end <= col {
|
||||
res -= usize::from(c.len()) - c.len_utf16();
|
||||
res -= usize::from(c.len()) - c.wide_len(enc);
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
|
@ -148,11 +164,11 @@ impl LineIndex {
|
|||
res
|
||||
}
|
||||
|
||||
fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
|
||||
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
|
||||
for c in utf16_chars {
|
||||
fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {
|
||||
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||
for c in wide_chars {
|
||||
if col > u32::from(c.start) {
|
||||
col += u32::from(c.len()) - c.len_utf16() as u32;
|
||||
col += u32::from(c.len()) - c.wide_len(enc) as u32;
|
||||
} else {
|
||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||
// so we don't need to take them into account
|
||||
|
@ -167,6 +183,9 @@ impl LineIndex {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use test_utils::skip_slow_tests;
|
||||
|
||||
use super::WideEncoding::{Utf16, Utf32};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
@ -210,67 +229,59 @@ mod tests {
|
|||
const C: char = 'x';
|
||||
",
|
||||
);
|
||||
assert_eq!(col_index.utf16_lines.len(), 0);
|
||||
assert_eq!(col_index.line_wide_chars.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_char() {
|
||||
let col_index = LineIndex::new(
|
||||
"
|
||||
const C: char = 'メ';
|
||||
",
|
||||
);
|
||||
fn test_every_chars() {
|
||||
if skip_slow_tests() {
|
||||
return;
|
||||
}
|
||||
|
||||
assert_eq!(col_index.utf16_lines.len(), 1);
|
||||
assert_eq!(col_index.utf16_lines[&1].len(), 1);
|
||||
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
|
||||
let text: String = {
|
||||
let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
|
||||
chars.extend("\n".repeat(chars.len() / 16).chars());
|
||||
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||
stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
|
||||
chars.into_iter().collect()
|
||||
};
|
||||
assert!(text.contains('💩')); // Sanity check.
|
||||
|
||||
// UTF-8 to UTF-16, no changes
|
||||
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
|
||||
let line_index = LineIndex::new(&text);
|
||||
|
||||
// UTF-8 to UTF-16
|
||||
assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
|
||||
let mut lin_col = LineCol { line: 0, col: 0 };
|
||||
let mut col_utf16 = 0;
|
||||
let mut col_utf32 = 0;
|
||||
for (offset, c) in text.char_indices() {
|
||||
let got_offset = line_index.offset(lin_col).unwrap();
|
||||
assert_eq!(usize::from(got_offset), offset);
|
||||
|
||||
// UTF-16 to UTF-8, no changes
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
|
||||
let got_lin_col = line_index.line_col(got_offset);
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
|
||||
// UTF-16 to UTF-8
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
|
||||
for enc in [Utf16, Utf32] {
|
||||
let wide_lin_col = line_index.to_wide(enc, lin_col);
|
||||
let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
|
||||
assert_eq!(got_lin_col, lin_col);
|
||||
|
||||
let col_index = LineIndex::new("a𐐏b");
|
||||
assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
|
||||
}
|
||||
let want_col = match enc {
|
||||
Utf16 => col_utf16,
|
||||
Utf32 => col_utf32,
|
||||
};
|
||||
assert_eq!(wide_lin_col.col, want_col)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
let col_index = LineIndex::new(
|
||||
"
|
||||
const C: char = \"メ メ\";
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(col_index.utf16_lines.len(), 1);
|
||||
assert_eq!(col_index.utf16_lines[&1].len(), 2);
|
||||
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
|
||||
assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() });
|
||||
|
||||
// UTF-8 to UTF-16
|
||||
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
|
||||
|
||||
assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
|
||||
assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
|
||||
|
||||
assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
|
||||
|
||||
// UTF-16 to UTF-8
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
|
||||
|
||||
// メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
|
||||
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
|
||||
|
||||
assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
|
||||
if c == '\n' {
|
||||
lin_col.line += 1;
|
||||
lin_col.col = 0;
|
||||
col_utf16 = 0;
|
||||
col_utf32 = 0;
|
||||
} else {
|
||||
lin_col.col += c.len_utf8() as u32;
|
||||
col_utf16 += c.len_utf16() as u32;
|
||||
col_utf32 += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -115,7 +115,7 @@ pub use ide_db::{
|
|||
SourceRoot, SourceRootId,
|
||||
},
|
||||
label::Label,
|
||||
line_index::{LineCol, LineColUtf16, LineIndex},
|
||||
line_index::{LineCol, LineIndex},
|
||||
search::{ReferenceCategory, SearchScope},
|
||||
source_change::{FileSystemEdit, SourceChange},
|
||||
symbol_index::Query,
|
||||
|
|
|
@ -18,7 +18,9 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
|
|||
let crate_graph = db.crate_graph();
|
||||
|
||||
let mut shuffled_ids = crate_graph.iter().collect::<Vec<_>>();
|
||||
shuffle(&mut shuffled_ids);
|
||||
|
||||
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||
stdx::rand::shuffle(&mut shuffled_ids, |i| rng.rand_range(0..i as u32) as usize);
|
||||
|
||||
let mut new_graph = CrateGraph::default();
|
||||
|
||||
|
@ -52,21 +54,3 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
|
|||
|
||||
db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH);
|
||||
}
|
||||
|
||||
fn shuffle<T>(slice: &mut [T]) {
|
||||
let mut rng = oorandom::Rand32::new(seed());
|
||||
|
||||
let mut remaining = slice.len() - 1;
|
||||
while remaining > 0 {
|
||||
let index = rng.rand_range(0..remaining as u32);
|
||||
slice.swap(remaining, index as usize);
|
||||
remaining -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn seed() -> u64 {
|
||||
use std::collections::hash_map::RandomState;
|
||||
use std::hash::{BuildHasher, Hasher};
|
||||
|
||||
RandomState::new().build_hasher().finish()
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
//! Advertises the capabilities of the LSP Server.
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use lsp_types::{
|
||||
CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions,
|
||||
CodeActionProviderCapability, CodeLensOptions, CompletionOptions,
|
||||
|
@ -16,16 +17,19 @@ use lsp_types::{
|
|||
use serde_json::json;
|
||||
|
||||
use crate::config::{Config, RustfmtConfig};
|
||||
use crate::lsp_ext::supports_utf8;
|
||||
use crate::line_index::PositionEncoding;
|
||||
use crate::lsp_ext::negotiated_encoding;
|
||||
use crate::semantic_tokens;
|
||||
|
||||
pub fn server_capabilities(config: &Config) -> ServerCapabilities {
|
||||
ServerCapabilities {
|
||||
position_encoding: if supports_utf8(config.caps()) {
|
||||
Some(PositionEncodingKind::UTF8)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
position_encoding: Some(match negotiated_encoding(config.caps()) {
|
||||
PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
|
||||
PositionEncoding::Wide(wide) => match wide {
|
||||
WideEncoding::Utf16 => PositionEncodingKind::UTF16,
|
||||
WideEncoding::Utf32 => PositionEncodingKind::UTF32,
|
||||
},
|
||||
}),
|
||||
text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {
|
||||
open_close: Some(true),
|
||||
change: Some(TextDocumentSyncKind::INCREMENTAL),
|
||||
|
|
|
@ -11,6 +11,7 @@ use ide::{
|
|||
use ide_db::LineIndexDatabase;
|
||||
|
||||
use ide_db::base_db::salsa::{self, ParallelDatabase};
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use lsp_types::{self, lsif};
|
||||
use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace};
|
||||
use vfs::{AbsPathBuf, Vfs};
|
||||
|
@ -127,7 +128,7 @@ impl LsifManager<'_> {
|
|||
let line_index = self.db.line_index(file_id);
|
||||
let line_index = LineIndex {
|
||||
index: line_index,
|
||||
encoding: PositionEncoding::Utf16,
|
||||
encoding: PositionEncoding::Wide(WideEncoding::Utf16),
|
||||
endings: LineEndings::Unix,
|
||||
};
|
||||
let range_id = self.add_vertex(lsif::Vertex::Range {
|
||||
|
@ -249,7 +250,7 @@ impl LsifManager<'_> {
|
|||
let line_index = self.db.line_index(file_id);
|
||||
let line_index = LineIndex {
|
||||
index: line_index,
|
||||
encoding: PositionEncoding::Utf16,
|
||||
encoding: PositionEncoding::Wide(WideEncoding::Utf16),
|
||||
endings: LineEndings::Unix,
|
||||
};
|
||||
let result = folds
|
||||
|
|
|
@ -33,7 +33,7 @@ use crate::{
|
|||
caps::completion_item_edit_resolve,
|
||||
diagnostics::DiagnosticsMapConfig,
|
||||
line_index::PositionEncoding,
|
||||
lsp_ext::{self, supports_utf8, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope},
|
||||
lsp_ext::{self, negotiated_encoding, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope},
|
||||
};
|
||||
|
||||
mod patch_old_style;
|
||||
|
@ -999,11 +999,7 @@ impl Config {
|
|||
}
|
||||
|
||||
pub fn position_encoding(&self) -> PositionEncoding {
|
||||
if supports_utf8(&self.caps) {
|
||||
PositionEncoding::Utf8
|
||||
} else {
|
||||
PositionEncoding::Utf16
|
||||
}
|
||||
negotiated_encoding(&self.caps)
|
||||
}
|
||||
|
||||
fn experimental(&self, index: &'static str) -> bool {
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use itertools::Itertools;
|
||||
use stdx::format_to;
|
||||
use vfs::{AbsPath, AbsPathBuf};
|
||||
|
@ -95,7 +96,8 @@ fn position(
|
|||
let mut char_offset = 0;
|
||||
let len_func = match position_encoding {
|
||||
PositionEncoding::Utf8 => char::len_utf8,
|
||||
PositionEncoding::Utf16 => char::len_utf16,
|
||||
PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16,
|
||||
PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1,
|
||||
};
|
||||
for c in line.text.chars() {
|
||||
char_offset += 1;
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
//! Conversion lsp_types types to rust-analyzer specific ones.
|
||||
use anyhow::format_err;
|
||||
use ide::{Annotation, AnnotationKind, AssistKind, LineCol, LineColUtf16};
|
||||
use ide_db::base_db::{FileId, FilePosition, FileRange};
|
||||
use ide::{Annotation, AnnotationKind, AssistKind, LineCol};
|
||||
use ide_db::{
|
||||
base_db::{FileId, FilePosition, FileRange},
|
||||
line_index::WideLineCol,
|
||||
};
|
||||
use syntax::{TextRange, TextSize};
|
||||
use vfs::AbsPathBuf;
|
||||
|
||||
|
@ -26,9 +29,9 @@ pub(crate) fn vfs_path(url: &lsp_types::Url) -> Result<vfs::VfsPath> {
|
|||
pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result<TextSize> {
|
||||
let line_col = match line_index.encoding {
|
||||
PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },
|
||||
PositionEncoding::Utf16 => {
|
||||
let line_col = LineColUtf16 { line: position.line, col: position.character };
|
||||
line_index.index.to_utf8(line_col)
|
||||
PositionEncoding::Wide(enc) => {
|
||||
let line_col = WideLineCol { line: position.line, col: position.character };
|
||||
line_index.index.to_utf8(enc, line_col)
|
||||
}
|
||||
};
|
||||
let text_size =
|
||||
|
|
|
@ -7,9 +7,12 @@
|
|||
|
||||
use std::sync::Arc;
|
||||
|
||||
use ide_db::line_index::WideEncoding;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum PositionEncoding {
|
||||
Utf8,
|
||||
Utf16,
|
||||
Wide(WideEncoding),
|
||||
}
|
||||
|
||||
pub(crate) struct LineIndex {
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
use std::{collections::HashMap, path::PathBuf};
|
||||
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use lsp_types::request::Request;
|
||||
use lsp_types::PositionEncodingKind;
|
||||
use lsp_types::{
|
||||
|
@ -10,6 +11,8 @@ use lsp_types::{
|
|||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::line_index::PositionEncoding;
|
||||
|
||||
pub enum AnalyzerStatus {}
|
||||
|
||||
impl Request for AnalyzerStatus {
|
||||
|
@ -481,16 +484,22 @@ pub(crate) enum CodeLensResolveData {
|
|||
References(lsp_types::TextDocumentPositionParams),
|
||||
}
|
||||
|
||||
pub fn supports_utf8(caps: &lsp_types::ClientCapabilities) -> bool {
|
||||
match &caps.general {
|
||||
Some(general) => general
|
||||
.position_encodings
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.any(|it| it == &PositionEncodingKind::UTF8),
|
||||
_ => false,
|
||||
pub fn negotiated_encoding(caps: &lsp_types::ClientCapabilities) -> PositionEncoding {
|
||||
let client_encodings = match &caps.general {
|
||||
Some(general) => general.position_encodings.as_deref().unwrap_or_default(),
|
||||
None => &[],
|
||||
};
|
||||
|
||||
for enc in client_encodings {
|
||||
if enc == &PositionEncodingKind::UTF8 {
|
||||
return PositionEncoding::Utf8;
|
||||
} else if enc == &PositionEncodingKind::UTF32 {
|
||||
return PositionEncoding::Wide(WideEncoding::Utf32);
|
||||
}
|
||||
// NB: intentionally prefer just about anything else to utf-16.
|
||||
}
|
||||
|
||||
PositionEncoding::Wide(WideEncoding::Utf16)
|
||||
}
|
||||
|
||||
pub enum MoveItem {}
|
||||
|
|
|
@ -161,6 +161,7 @@ impl GlobalState {
|
|||
}
|
||||
|
||||
pub(crate) fn apply_document_changes(
|
||||
encoding: PositionEncoding,
|
||||
file_contents: impl FnOnce() -> String,
|
||||
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
|
||||
) -> String {
|
||||
|
@ -192,9 +193,9 @@ pub(crate) fn apply_document_changes(
|
|||
let mut line_index = LineIndex {
|
||||
// the index will be overwritten in the bottom loop's first iteration
|
||||
index: Arc::new(ide::LineIndex::new(&text)),
|
||||
// We don't care about line endings or offset encoding here.
|
||||
// We don't care about line endings here.
|
||||
endings: LineEndings::Unix,
|
||||
encoding: PositionEncoding::Utf16,
|
||||
encoding,
|
||||
};
|
||||
|
||||
// The changes we got must be applied sequentially, but can cross lines so we
|
||||
|
@ -256,6 +257,7 @@ pub(crate) fn all_edits_are_disjoint(
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ide_db::line_index::WideEncoding;
|
||||
use lsp_types::{
|
||||
CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range,
|
||||
TextDocumentContentChangeEvent,
|
||||
|
@ -278,9 +280,11 @@ mod tests {
|
|||
};
|
||||
}
|
||||
|
||||
let text = apply_document_changes(|| String::new(), vec![]);
|
||||
let encoding = PositionEncoding::Wide(WideEncoding::Utf16);
|
||||
let text = apply_document_changes(encoding, || String::new(), vec![]);
|
||||
assert_eq!(text, "");
|
||||
let text = apply_document_changes(
|
||||
encoding,
|
||||
|| text,
|
||||
vec![TextDocumentContentChangeEvent {
|
||||
range: None,
|
||||
|
@ -289,39 +293,49 @@ mod tests {
|
|||
}],
|
||||
);
|
||||
assert_eq!(text, "the");
|
||||
let text = apply_document_changes(|| text, c![0, 3; 0, 3 => " quick"]);
|
||||
let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]);
|
||||
assert_eq!(text, "the quick");
|
||||
let text = apply_document_changes(|| text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
|
||||
let text =
|
||||
apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
|
||||
assert_eq!(text, "quick foxes");
|
||||
let text = apply_document_changes(|| text, c![0, 11; 0, 11 => "\ndream"]);
|
||||
let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]);
|
||||
assert_eq!(text, "quick foxes\ndream");
|
||||
let text = apply_document_changes(|| text, c![1, 0; 1, 0 => "have "]);
|
||||
let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]);
|
||||
assert_eq!(text, "quick foxes\nhave dream");
|
||||
let text = apply_document_changes(
|
||||
encoding,
|
||||
|| text,
|
||||
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
|
||||
);
|
||||
assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
|
||||
let text = apply_document_changes(|| text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]);
|
||||
let text = apply_document_changes(
|
||||
encoding,
|
||||
|| text,
|
||||
c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"],
|
||||
);
|
||||
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
|
||||
let text = apply_document_changes(
|
||||
encoding,
|
||||
|| text,
|
||||
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
|
||||
);
|
||||
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
|
||||
let text = apply_document_changes(|| text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
|
||||
let text =
|
||||
apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
|
||||
assert_eq!(text, "the quick \nthey have quiet dreams\n");
|
||||
|
||||
let text = String::from("❤️");
|
||||
let text = apply_document_changes(|| text, c![0, 0; 0, 0 => "a"]);
|
||||
let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]);
|
||||
assert_eq!(text, "a❤️");
|
||||
|
||||
let text = String::from("a\nb");
|
||||
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
|
||||
let text =
|
||||
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
|
||||
assert_eq!(text, "adcb");
|
||||
|
||||
let text = String::from("a\nb");
|
||||
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
|
||||
let text =
|
||||
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
|
||||
assert_eq!(text, "ațc\ncb");
|
||||
}
|
||||
|
||||
|
|
|
@ -831,6 +831,7 @@ impl GlobalState {
|
|||
let vfs = &mut this.vfs.write().0;
|
||||
let file_id = vfs.file_id(&path).unwrap();
|
||||
let text = apply_document_changes(
|
||||
this.config.position_encoding(),
|
||||
|| std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(),
|
||||
params.content_changes,
|
||||
);
|
||||
|
|
|
@ -31,8 +31,8 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P
|
|||
let line_col = line_index.index.line_col(offset);
|
||||
match line_index.encoding {
|
||||
PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),
|
||||
PositionEncoding::Utf16 => {
|
||||
let line_col = line_index.index.to_utf16(line_col);
|
||||
PositionEncoding::Wide(enc) => {
|
||||
let line_col = line_index.index.to_wide(enc, line_col);
|
||||
lsp_types::Position::new(line_col.line, line_col.col)
|
||||
}
|
||||
}
|
||||
|
@ -1429,7 +1429,7 @@ fn main() {
|
|||
let line_index = LineIndex {
|
||||
index: Arc::new(ide::LineIndex::new(text)),
|
||||
endings: LineEndings::Unix,
|
||||
encoding: PositionEncoding::Utf16,
|
||||
encoding: PositionEncoding::Utf8,
|
||||
};
|
||||
let converted: Vec<lsp_types::FoldingRange> =
|
||||
folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect();
|
||||
|
|
|
@ -11,6 +11,7 @@ pub mod hash;
|
|||
pub mod process;
|
||||
pub mod panic_context;
|
||||
pub mod non_empty_vec;
|
||||
pub mod rand;
|
||||
|
||||
pub use always_assert::{always, never};
|
||||
|
||||
|
|
21
crates/stdx/src/rand.rs
Normal file
21
crates/stdx/src/rand.rs
Normal file
|
@ -0,0 +1,21 @@
|
|||
//! We don't use `rand`, as that's too many things for us.
|
||||
//!
|
||||
//! Currently, we use oorandom instead, but it misses these two utilities.
|
||||
//! Perhaps we should switch to `fastrand`, or our own small prng, it's not like
|
||||
//! we need anything move complicatied that xor-shift.
|
||||
|
||||
pub fn shuffle<T>(slice: &mut [T], mut rand_index: impl FnMut(usize) -> usize) {
|
||||
let mut remaining = slice.len() - 1;
|
||||
while remaining > 0 {
|
||||
let index = rand_index(remaining);
|
||||
slice.swap(remaining, index);
|
||||
remaining -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn seed() -> u64 {
|
||||
use std::collections::hash_map::RandomState;
|
||||
use std::hash::{BuildHasher, Hasher};
|
||||
|
||||
RandomState::new().build_hasher().finish()
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
<!---
|
||||
lsp_ext.rs hash: ec29403e67dfd15b
|
||||
lsp_ext.rs hash: d87477896dfe41d4
|
||||
|
||||
If you need to change the above hash to make the test pass, please check if you
|
||||
need to adjust this doc as well and ping this issue:
|
||||
|
|
Loading…
Reference in a new issue