mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-04 01:08:47 +00:00
Auto merge of #14141 - matklad:utf-32, r=lnicola
Support UTF-32 position encoding Looks like this is a native encoding for Emacs at least!
This commit is contained in:
commit
31486a639d
18 changed files with 210 additions and 158 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -711,6 +711,7 @@ dependencies = [
|
||||||
"limit",
|
"limit",
|
||||||
"memchr",
|
"memchr",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"oorandom",
|
||||||
"parser",
|
"parser",
|
||||||
"profile",
|
"profile",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
|
|
@ -37,8 +37,9 @@ text-edit.workspace = true
|
||||||
hir.workspace = true
|
hir.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
xshell = "0.2.2"
|
|
||||||
expect-test = "1.4.0"
|
expect-test = "1.4.0"
|
||||||
|
oorandom = "11.1.3"
|
||||||
|
xshell = "0.2.2"
|
||||||
|
|
||||||
# local deps
|
# local deps
|
||||||
test-utils.workspace = true
|
test-utils.workspace = true
|
||||||
|
|
|
@ -7,20 +7,13 @@ use syntax::{TextRange, TextSize};
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct LineIndex {
|
pub struct LineIndex {
|
||||||
/// Offset the the beginning of each line, zero-based
|
/// Offset the beginning of each line, zero-based.
|
||||||
pub(crate) newlines: Vec<TextSize>,
|
pub(crate) newlines: Vec<TextSize>,
|
||||||
/// List of non-ASCII characters on each line
|
/// List of non-ASCII characters on each line.
|
||||||
pub(crate) utf16_lines: NoHashHashMap<u32, Vec<Utf16Char>>,
|
pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
|
||||||
pub struct LineColUtf16 {
|
|
||||||
/// Zero-based
|
|
||||||
pub line: u32,
|
|
||||||
/// Zero-based
|
|
||||||
pub col: u32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Line/Column information in native, utf8 format.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
pub struct LineCol {
|
pub struct LineCol {
|
||||||
/// Zero-based
|
/// Zero-based
|
||||||
|
@ -29,34 +22,57 @@ pub struct LineCol {
|
||||||
pub col: u32,
|
pub col: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum WideEncoding {
|
||||||
|
Utf16,
|
||||||
|
Utf32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Line/Column information in legacy encodings.
|
||||||
|
///
|
||||||
|
/// Deliberately not a generic type and different from `LineCol`.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub struct WideLineCol {
|
||||||
|
/// Zero-based
|
||||||
|
pub line: u32,
|
||||||
|
/// Zero-based
|
||||||
|
pub col: u32,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||||
pub(crate) struct Utf16Char {
|
pub(crate) struct WideChar {
|
||||||
/// Start offset of a character inside a line, zero-based
|
/// Start offset of a character inside a line, zero-based
|
||||||
pub(crate) start: TextSize,
|
pub(crate) start: TextSize,
|
||||||
/// End offset of a character inside a line, zero-based
|
/// End offset of a character inside a line, zero-based
|
||||||
pub(crate) end: TextSize,
|
pub(crate) end: TextSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Utf16Char {
|
impl WideChar {
|
||||||
/// Returns the length in 8-bit UTF-8 code units.
|
/// Returns the length in 8-bit UTF-8 code units.
|
||||||
fn len(&self) -> TextSize {
|
fn len(&self) -> TextSize {
|
||||||
self.end - self.start
|
self.end - self.start
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the length in 16-bit UTF-16 code units.
|
/// Returns the length in UTF-16 or UTF-32 code units.
|
||||||
fn len_utf16(&self) -> usize {
|
fn wide_len(&self, enc: WideEncoding) -> usize {
|
||||||
|
match enc {
|
||||||
|
WideEncoding::Utf16 => {
|
||||||
if self.len() == TextSize::from(4) {
|
if self.len() == TextSize::from(4) {
|
||||||
2
|
2
|
||||||
} else {
|
} else {
|
||||||
1
|
1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WideEncoding::Utf32 => 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineIndex {
|
impl LineIndex {
|
||||||
pub fn new(text: &str) -> LineIndex {
|
pub fn new(text: &str) -> LineIndex {
|
||||||
let mut utf16_lines = NoHashHashMap::default();
|
let mut line_wide_chars = NoHashHashMap::default();
|
||||||
let mut utf16_chars = Vec::new();
|
let mut wide_chars = Vec::new();
|
||||||
|
|
||||||
let mut newlines = Vec::with_capacity(16);
|
let mut newlines = Vec::with_capacity(16);
|
||||||
newlines.push(TextSize::from(0));
|
newlines.push(TextSize::from(0));
|
||||||
|
@ -71,8 +87,8 @@ impl LineIndex {
|
||||||
newlines.push(curr_row);
|
newlines.push(curr_row);
|
||||||
|
|
||||||
// Save any utf-16 characters seen in the previous line
|
// Save any utf-16 characters seen in the previous line
|
||||||
if !utf16_chars.is_empty() {
|
if !wide_chars.is_empty() {
|
||||||
utf16_lines.insert(line, mem::take(&mut utf16_chars));
|
line_wide_chars.insert(line, mem::take(&mut wide_chars));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare for processing the next line
|
// Prepare for processing the next line
|
||||||
|
@ -82,18 +98,18 @@ impl LineIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
if !c.is_ascii() {
|
if !c.is_ascii() {
|
||||||
utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len });
|
wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });
|
||||||
}
|
}
|
||||||
|
|
||||||
curr_col += c_len;
|
curr_col += c_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save any utf-16 characters seen in the last line
|
// Save any utf-16 characters seen in the last line
|
||||||
if !utf16_chars.is_empty() {
|
if !wide_chars.is_empty() {
|
||||||
utf16_lines.insert(line, utf16_chars);
|
line_wide_chars.insert(line, wide_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
LineIndex { newlines, utf16_lines }
|
LineIndex { newlines, line_wide_chars }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn line_col(&self, offset: TextSize) -> LineCol {
|
pub fn line_col(&self, offset: TextSize) -> LineCol {
|
||||||
|
@ -109,13 +125,13 @@ impl LineIndex {
|
||||||
.map(|offset| offset + TextSize::from(line_col.col))
|
.map(|offset| offset + TextSize::from(line_col.col))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
|
pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {
|
||||||
let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
|
let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());
|
||||||
LineColUtf16 { line: line_col.line, col: col as u32 }
|
WideLineCol { line: line_col.line, col: col as u32 }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
|
pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {
|
||||||
let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
|
let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);
|
||||||
LineCol { line: line_col.line, col: col.into() }
|
LineCol { line: line_col.line, col: col.into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,12 +148,12 @@ impl LineIndex {
|
||||||
.filter(|it| !it.is_empty())
|
.filter(|it| !it.is_empty())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
|
fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {
|
||||||
let mut res: usize = col.into();
|
let mut res: usize = col.into();
|
||||||
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
|
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||||
for c in utf16_chars {
|
for c in wide_chars {
|
||||||
if c.end <= col {
|
if c.end <= col {
|
||||||
res -= usize::from(c.len()) - c.len_utf16();
|
res -= usize::from(c.len()) - c.wide_len(enc);
|
||||||
} else {
|
} else {
|
||||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||||
// so we don't need to take them into account
|
// so we don't need to take them into account
|
||||||
|
@ -148,11 +164,11 @@ impl LineIndex {
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
|
fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {
|
||||||
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
|
if let Some(wide_chars) = self.line_wide_chars.get(&line) {
|
||||||
for c in utf16_chars {
|
for c in wide_chars {
|
||||||
if col > u32::from(c.start) {
|
if col > u32::from(c.start) {
|
||||||
col += u32::from(c.len()) - c.len_utf16() as u32;
|
col += u32::from(c.len()) - c.wide_len(enc) as u32;
|
||||||
} else {
|
} else {
|
||||||
// From here on, all utf16 characters come *after* the character we are mapping,
|
// From here on, all utf16 characters come *after* the character we are mapping,
|
||||||
// so we don't need to take them into account
|
// so we don't need to take them into account
|
||||||
|
@ -167,6 +183,9 @@ impl LineIndex {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use test_utils::skip_slow_tests;
|
||||||
|
|
||||||
|
use super::WideEncoding::{Utf16, Utf32};
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -210,67 +229,59 @@ mod tests {
|
||||||
const C: char = 'x';
|
const C: char = 'x';
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
assert_eq!(col_index.utf16_lines.len(), 0);
|
assert_eq!(col_index.line_wide_chars.len(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_single_char() {
|
fn test_every_chars() {
|
||||||
let col_index = LineIndex::new(
|
if skip_slow_tests() {
|
||||||
"
|
return;
|
||||||
const C: char = 'メ';
|
|
||||||
",
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(col_index.utf16_lines.len(), 1);
|
|
||||||
assert_eq!(col_index.utf16_lines[&1].len(), 1);
|
|
||||||
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
|
|
||||||
|
|
||||||
// UTF-8 to UTF-16, no changes
|
|
||||||
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
|
|
||||||
|
|
||||||
// UTF-8 to UTF-16
|
|
||||||
assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
|
|
||||||
|
|
||||||
// UTF-16 to UTF-8, no changes
|
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
|
|
||||||
|
|
||||||
// UTF-16 to UTF-8
|
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
|
|
||||||
|
|
||||||
let col_index = LineIndex::new("a𐐏b");
|
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
let text: String = {
|
||||||
fn test_string() {
|
let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!
|
||||||
let col_index = LineIndex::new(
|
chars.extend("\n".repeat(chars.len() / 16).chars());
|
||||||
"
|
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||||
const C: char = \"メ メ\";
|
stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);
|
||||||
",
|
chars.into_iter().collect()
|
||||||
);
|
};
|
||||||
|
assert!(text.contains('💩')); // Sanity check.
|
||||||
|
|
||||||
assert_eq!(col_index.utf16_lines.len(), 1);
|
let line_index = LineIndex::new(&text);
|
||||||
assert_eq!(col_index.utf16_lines[&1].len(), 2);
|
|
||||||
assert_eq!(col_index.utf16_lines[&1][0], Utf16Char { start: 17.into(), end: 20.into() });
|
|
||||||
assert_eq!(col_index.utf16_lines[&1][1], Utf16Char { start: 21.into(), end: 24.into() });
|
|
||||||
|
|
||||||
// UTF-8 to UTF-16
|
let mut lin_col = LineCol { line: 0, col: 0 };
|
||||||
assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
|
let mut col_utf16 = 0;
|
||||||
|
let mut col_utf32 = 0;
|
||||||
|
for (offset, c) in text.char_indices() {
|
||||||
|
let got_offset = line_index.offset(lin_col).unwrap();
|
||||||
|
assert_eq!(usize::from(got_offset), offset);
|
||||||
|
|
||||||
assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
|
let got_lin_col = line_index.line_col(got_offset);
|
||||||
assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
|
assert_eq!(got_lin_col, lin_col);
|
||||||
|
|
||||||
assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
|
for enc in [Utf16, Utf32] {
|
||||||
|
let wide_lin_col = line_index.to_wide(enc, lin_col);
|
||||||
|
let got_lin_col = line_index.to_utf8(enc, wide_lin_col);
|
||||||
|
assert_eq!(got_lin_col, lin_col);
|
||||||
|
|
||||||
// UTF-16 to UTF-8
|
let want_col = match enc {
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
|
Utf16 => col_utf16,
|
||||||
|
Utf32 => col_utf32,
|
||||||
|
};
|
||||||
|
assert_eq!(wide_lin_col.col, want_col)
|
||||||
|
}
|
||||||
|
|
||||||
// メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
|
if c == '\n' {
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
|
lin_col.line += 1;
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
|
lin_col.col = 0;
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
|
col_utf16 = 0;
|
||||||
|
col_utf32 = 0;
|
||||||
assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
|
} else {
|
||||||
|
lin_col.col += c.len_utf8() as u32;
|
||||||
|
col_utf16 += c.len_utf16() as u32;
|
||||||
|
col_utf32 += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -115,7 +115,7 @@ pub use ide_db::{
|
||||||
SourceRoot, SourceRootId,
|
SourceRoot, SourceRootId,
|
||||||
},
|
},
|
||||||
label::Label,
|
label::Label,
|
||||||
line_index::{LineCol, LineColUtf16, LineIndex},
|
line_index::{LineCol, LineIndex},
|
||||||
search::{ReferenceCategory, SearchScope},
|
search::{ReferenceCategory, SearchScope},
|
||||||
source_change::{FileSystemEdit, SourceChange},
|
source_change::{FileSystemEdit, SourceChange},
|
||||||
symbol_index::Query,
|
symbol_index::Query,
|
||||||
|
|
|
@ -18,7 +18,9 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
|
||||||
let crate_graph = db.crate_graph();
|
let crate_graph = db.crate_graph();
|
||||||
|
|
||||||
let mut shuffled_ids = crate_graph.iter().collect::<Vec<_>>();
|
let mut shuffled_ids = crate_graph.iter().collect::<Vec<_>>();
|
||||||
shuffle(&mut shuffled_ids);
|
|
||||||
|
let mut rng = oorandom::Rand32::new(stdx::rand::seed());
|
||||||
|
stdx::rand::shuffle(&mut shuffled_ids, |i| rng.rand_range(0..i as u32) as usize);
|
||||||
|
|
||||||
let mut new_graph = CrateGraph::default();
|
let mut new_graph = CrateGraph::default();
|
||||||
|
|
||||||
|
@ -52,21 +54,3 @@ pub(crate) fn shuffle_crate_graph(db: &mut RootDatabase) {
|
||||||
|
|
||||||
db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH);
|
db.set_crate_graph_with_durability(Arc::new(new_graph), Durability::HIGH);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shuffle<T>(slice: &mut [T]) {
|
|
||||||
let mut rng = oorandom::Rand32::new(seed());
|
|
||||||
|
|
||||||
let mut remaining = slice.len() - 1;
|
|
||||||
while remaining > 0 {
|
|
||||||
let index = rng.rand_range(0..remaining as u32);
|
|
||||||
slice.swap(remaining, index as usize);
|
|
||||||
remaining -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn seed() -> u64 {
|
|
||||||
use std::collections::hash_map::RandomState;
|
|
||||||
use std::hash::{BuildHasher, Hasher};
|
|
||||||
|
|
||||||
RandomState::new().build_hasher().finish()
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
//! Advertises the capabilities of the LSP Server.
|
//! Advertises the capabilities of the LSP Server.
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
use lsp_types::{
|
use lsp_types::{
|
||||||
CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions,
|
CallHierarchyServerCapability, ClientCapabilities, CodeActionKind, CodeActionOptions,
|
||||||
CodeActionProviderCapability, CodeLensOptions, CompletionOptions,
|
CodeActionProviderCapability, CodeLensOptions, CompletionOptions,
|
||||||
|
@ -16,16 +17,19 @@ use lsp_types::{
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::config::{Config, RustfmtConfig};
|
use crate::config::{Config, RustfmtConfig};
|
||||||
use crate::lsp_ext::supports_utf8;
|
use crate::line_index::PositionEncoding;
|
||||||
|
use crate::lsp_ext::negotiated_encoding;
|
||||||
use crate::semantic_tokens;
|
use crate::semantic_tokens;
|
||||||
|
|
||||||
pub fn server_capabilities(config: &Config) -> ServerCapabilities {
|
pub fn server_capabilities(config: &Config) -> ServerCapabilities {
|
||||||
ServerCapabilities {
|
ServerCapabilities {
|
||||||
position_encoding: if supports_utf8(config.caps()) {
|
position_encoding: Some(match negotiated_encoding(config.caps()) {
|
||||||
Some(PositionEncodingKind::UTF8)
|
PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
|
||||||
} else {
|
PositionEncoding::Wide(wide) => match wide {
|
||||||
None
|
WideEncoding::Utf16 => PositionEncodingKind::UTF16,
|
||||||
|
WideEncoding::Utf32 => PositionEncodingKind::UTF32,
|
||||||
},
|
},
|
||||||
|
}),
|
||||||
text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {
|
text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {
|
||||||
open_close: Some(true),
|
open_close: Some(true),
|
||||||
change: Some(TextDocumentSyncKind::INCREMENTAL),
|
change: Some(TextDocumentSyncKind::INCREMENTAL),
|
||||||
|
|
|
@ -11,6 +11,7 @@ use ide::{
|
||||||
use ide_db::LineIndexDatabase;
|
use ide_db::LineIndexDatabase;
|
||||||
|
|
||||||
use ide_db::base_db::salsa::{self, ParallelDatabase};
|
use ide_db::base_db::salsa::{self, ParallelDatabase};
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
use lsp_types::{self, lsif};
|
use lsp_types::{self, lsif};
|
||||||
use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace};
|
use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace};
|
||||||
use vfs::{AbsPathBuf, Vfs};
|
use vfs::{AbsPathBuf, Vfs};
|
||||||
|
@ -127,7 +128,7 @@ impl LsifManager<'_> {
|
||||||
let line_index = self.db.line_index(file_id);
|
let line_index = self.db.line_index(file_id);
|
||||||
let line_index = LineIndex {
|
let line_index = LineIndex {
|
||||||
index: line_index,
|
index: line_index,
|
||||||
encoding: PositionEncoding::Utf16,
|
encoding: PositionEncoding::Wide(WideEncoding::Utf16),
|
||||||
endings: LineEndings::Unix,
|
endings: LineEndings::Unix,
|
||||||
};
|
};
|
||||||
let range_id = self.add_vertex(lsif::Vertex::Range {
|
let range_id = self.add_vertex(lsif::Vertex::Range {
|
||||||
|
@ -249,7 +250,7 @@ impl LsifManager<'_> {
|
||||||
let line_index = self.db.line_index(file_id);
|
let line_index = self.db.line_index(file_id);
|
||||||
let line_index = LineIndex {
|
let line_index = LineIndex {
|
||||||
index: line_index,
|
index: line_index,
|
||||||
encoding: PositionEncoding::Utf16,
|
encoding: PositionEncoding::Wide(WideEncoding::Utf16),
|
||||||
endings: LineEndings::Unix,
|
endings: LineEndings::Unix,
|
||||||
};
|
};
|
||||||
let result = folds
|
let result = folds
|
||||||
|
|
|
@ -33,7 +33,7 @@ use crate::{
|
||||||
caps::completion_item_edit_resolve,
|
caps::completion_item_edit_resolve,
|
||||||
diagnostics::DiagnosticsMapConfig,
|
diagnostics::DiagnosticsMapConfig,
|
||||||
line_index::PositionEncoding,
|
line_index::PositionEncoding,
|
||||||
lsp_ext::{self, supports_utf8, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope},
|
lsp_ext::{self, negotiated_encoding, WorkspaceSymbolSearchKind, WorkspaceSymbolSearchScope},
|
||||||
};
|
};
|
||||||
|
|
||||||
mod patch_old_style;
|
mod patch_old_style;
|
||||||
|
@ -999,11 +999,7 @@ impl Config {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn position_encoding(&self) -> PositionEncoding {
|
pub fn position_encoding(&self) -> PositionEncoding {
|
||||||
if supports_utf8(&self.caps) {
|
negotiated_encoding(&self.caps)
|
||||||
PositionEncoding::Utf8
|
|
||||||
} else {
|
|
||||||
PositionEncoding::Utf16
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn experimental(&self, index: &'static str) -> bool {
|
fn experimental(&self, index: &'static str) -> bool {
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};
|
use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use stdx::format_to;
|
use stdx::format_to;
|
||||||
use vfs::{AbsPath, AbsPathBuf};
|
use vfs::{AbsPath, AbsPathBuf};
|
||||||
|
@ -95,7 +96,8 @@ fn position(
|
||||||
let mut char_offset = 0;
|
let mut char_offset = 0;
|
||||||
let len_func = match position_encoding {
|
let len_func = match position_encoding {
|
||||||
PositionEncoding::Utf8 => char::len_utf8,
|
PositionEncoding::Utf8 => char::len_utf8,
|
||||||
PositionEncoding::Utf16 => char::len_utf16,
|
PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16,
|
||||||
|
PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1,
|
||||||
};
|
};
|
||||||
for c in line.text.chars() {
|
for c in line.text.chars() {
|
||||||
char_offset += 1;
|
char_offset += 1;
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
//! Conversion lsp_types types to rust-analyzer specific ones.
|
//! Conversion lsp_types types to rust-analyzer specific ones.
|
||||||
use anyhow::format_err;
|
use anyhow::format_err;
|
||||||
use ide::{Annotation, AnnotationKind, AssistKind, LineCol, LineColUtf16};
|
use ide::{Annotation, AnnotationKind, AssistKind, LineCol};
|
||||||
use ide_db::base_db::{FileId, FilePosition, FileRange};
|
use ide_db::{
|
||||||
|
base_db::{FileId, FilePosition, FileRange},
|
||||||
|
line_index::WideLineCol,
|
||||||
|
};
|
||||||
use syntax::{TextRange, TextSize};
|
use syntax::{TextRange, TextSize};
|
||||||
use vfs::AbsPathBuf;
|
use vfs::AbsPathBuf;
|
||||||
|
|
||||||
|
@ -26,9 +29,9 @@ pub(crate) fn vfs_path(url: &lsp_types::Url) -> Result<vfs::VfsPath> {
|
||||||
pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result<TextSize> {
|
pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> Result<TextSize> {
|
||||||
let line_col = match line_index.encoding {
|
let line_col = match line_index.encoding {
|
||||||
PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },
|
PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },
|
||||||
PositionEncoding::Utf16 => {
|
PositionEncoding::Wide(enc) => {
|
||||||
let line_col = LineColUtf16 { line: position.line, col: position.character };
|
let line_col = WideLineCol { line: position.line, col: position.character };
|
||||||
line_index.index.to_utf8(line_col)
|
line_index.index.to_utf8(enc, line_col)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let text_size =
|
let text_size =
|
||||||
|
|
|
@ -7,9 +7,12 @@
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
pub enum PositionEncoding {
|
pub enum PositionEncoding {
|
||||||
Utf8,
|
Utf8,
|
||||||
Utf16,
|
Wide(WideEncoding),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct LineIndex {
|
pub(crate) struct LineIndex {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
use std::{collections::HashMap, path::PathBuf};
|
use std::{collections::HashMap, path::PathBuf};
|
||||||
|
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
use lsp_types::request::Request;
|
use lsp_types::request::Request;
|
||||||
use lsp_types::PositionEncodingKind;
|
use lsp_types::PositionEncodingKind;
|
||||||
use lsp_types::{
|
use lsp_types::{
|
||||||
|
@ -10,6 +11,8 @@ use lsp_types::{
|
||||||
};
|
};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::line_index::PositionEncoding;
|
||||||
|
|
||||||
pub enum AnalyzerStatus {}
|
pub enum AnalyzerStatus {}
|
||||||
|
|
||||||
impl Request for AnalyzerStatus {
|
impl Request for AnalyzerStatus {
|
||||||
|
@ -481,16 +484,22 @@ pub(crate) enum CodeLensResolveData {
|
||||||
References(lsp_types::TextDocumentPositionParams),
|
References(lsp_types::TextDocumentPositionParams),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn supports_utf8(caps: &lsp_types::ClientCapabilities) -> bool {
|
pub fn negotiated_encoding(caps: &lsp_types::ClientCapabilities) -> PositionEncoding {
|
||||||
match &caps.general {
|
let client_encodings = match &caps.general {
|
||||||
Some(general) => general
|
Some(general) => general.position_encodings.as_deref().unwrap_or_default(),
|
||||||
.position_encodings
|
None => &[],
|
||||||
.as_deref()
|
};
|
||||||
.unwrap_or_default()
|
|
||||||
.iter()
|
for enc in client_encodings {
|
||||||
.any(|it| it == &PositionEncodingKind::UTF8),
|
if enc == &PositionEncodingKind::UTF8 {
|
||||||
_ => false,
|
return PositionEncoding::Utf8;
|
||||||
|
} else if enc == &PositionEncodingKind::UTF32 {
|
||||||
|
return PositionEncoding::Wide(WideEncoding::Utf32);
|
||||||
}
|
}
|
||||||
|
// NB: intentionally prefer just about anything else to utf-16.
|
||||||
|
}
|
||||||
|
|
||||||
|
PositionEncoding::Wide(WideEncoding::Utf16)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum MoveItem {}
|
pub enum MoveItem {}
|
||||||
|
|
|
@ -161,6 +161,7 @@ impl GlobalState {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn apply_document_changes(
|
pub(crate) fn apply_document_changes(
|
||||||
|
encoding: PositionEncoding,
|
||||||
file_contents: impl FnOnce() -> String,
|
file_contents: impl FnOnce() -> String,
|
||||||
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
|
mut content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
|
||||||
) -> String {
|
) -> String {
|
||||||
|
@ -192,9 +193,9 @@ pub(crate) fn apply_document_changes(
|
||||||
let mut line_index = LineIndex {
|
let mut line_index = LineIndex {
|
||||||
// the index will be overwritten in the bottom loop's first iteration
|
// the index will be overwritten in the bottom loop's first iteration
|
||||||
index: Arc::new(ide::LineIndex::new(&text)),
|
index: Arc::new(ide::LineIndex::new(&text)),
|
||||||
// We don't care about line endings or offset encoding here.
|
// We don't care about line endings here.
|
||||||
endings: LineEndings::Unix,
|
endings: LineEndings::Unix,
|
||||||
encoding: PositionEncoding::Utf16,
|
encoding,
|
||||||
};
|
};
|
||||||
|
|
||||||
// The changes we got must be applied sequentially, but can cross lines so we
|
// The changes we got must be applied sequentially, but can cross lines so we
|
||||||
|
@ -256,6 +257,7 @@ pub(crate) fn all_edits_are_disjoint(
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use ide_db::line_index::WideEncoding;
|
||||||
use lsp_types::{
|
use lsp_types::{
|
||||||
CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range,
|
CompletionItem, CompletionTextEdit, InsertReplaceEdit, Position, Range,
|
||||||
TextDocumentContentChangeEvent,
|
TextDocumentContentChangeEvent,
|
||||||
|
@ -278,9 +280,11 @@ mod tests {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = apply_document_changes(|| String::new(), vec![]);
|
let encoding = PositionEncoding::Wide(WideEncoding::Utf16);
|
||||||
|
let text = apply_document_changes(encoding, || String::new(), vec![]);
|
||||||
assert_eq!(text, "");
|
assert_eq!(text, "");
|
||||||
let text = apply_document_changes(
|
let text = apply_document_changes(
|
||||||
|
encoding,
|
||||||
|| text,
|
|| text,
|
||||||
vec![TextDocumentContentChangeEvent {
|
vec![TextDocumentContentChangeEvent {
|
||||||
range: None,
|
range: None,
|
||||||
|
@ -289,39 +293,49 @@ mod tests {
|
||||||
}],
|
}],
|
||||||
);
|
);
|
||||||
assert_eq!(text, "the");
|
assert_eq!(text, "the");
|
||||||
let text = apply_document_changes(|| text, c![0, 3; 0, 3 => " quick"]);
|
let text = apply_document_changes(encoding, || text, c![0, 3; 0, 3 => " quick"]);
|
||||||
assert_eq!(text, "the quick");
|
assert_eq!(text, "the quick");
|
||||||
let text = apply_document_changes(|| text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
|
let text =
|
||||||
|
apply_document_changes(encoding, || text, c![0, 0; 0, 4 => "", 0, 5; 0, 5 => " foxes"]);
|
||||||
assert_eq!(text, "quick foxes");
|
assert_eq!(text, "quick foxes");
|
||||||
let text = apply_document_changes(|| text, c![0, 11; 0, 11 => "\ndream"]);
|
let text = apply_document_changes(encoding, || text, c![0, 11; 0, 11 => "\ndream"]);
|
||||||
assert_eq!(text, "quick foxes\ndream");
|
assert_eq!(text, "quick foxes\ndream");
|
||||||
let text = apply_document_changes(|| text, c![1, 0; 1, 0 => "have "]);
|
let text = apply_document_changes(encoding, || text, c![1, 0; 1, 0 => "have "]);
|
||||||
assert_eq!(text, "quick foxes\nhave dream");
|
assert_eq!(text, "quick foxes\nhave dream");
|
||||||
let text = apply_document_changes(
|
let text = apply_document_changes(
|
||||||
|
encoding,
|
||||||
|| text,
|
|| text,
|
||||||
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
|
c![0, 0; 0, 0 => "the ", 1, 4; 1, 4 => " quiet", 1, 16; 1, 16 => "s\n"],
|
||||||
);
|
);
|
||||||
assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
|
assert_eq!(text, "the quick foxes\nhave quiet dreams\n");
|
||||||
let text = apply_document_changes(|| text, c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"]);
|
let text = apply_document_changes(
|
||||||
|
encoding,
|
||||||
|
|| text,
|
||||||
|
c![0, 15; 0, 15 => "\n", 2, 17; 2, 17 => "\n"],
|
||||||
|
);
|
||||||
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
|
assert_eq!(text, "the quick foxes\n\nhave quiet dreams\n\n");
|
||||||
let text = apply_document_changes(
|
let text = apply_document_changes(
|
||||||
|
encoding,
|
||||||
|| text,
|
|| text,
|
||||||
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
|
c![1, 0; 1, 0 => "DREAM", 2, 0; 2, 0 => "they ", 3, 0; 3, 0 => "DON'T THEY?"],
|
||||||
);
|
);
|
||||||
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
|
assert_eq!(text, "the quick foxes\nDREAM\nthey have quiet dreams\nDON'T THEY?\n");
|
||||||
let text = apply_document_changes(|| text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
|
let text =
|
||||||
|
apply_document_changes(encoding, || text, c![0, 10; 1, 5 => "", 2, 0; 2, 12 => ""]);
|
||||||
assert_eq!(text, "the quick \nthey have quiet dreams\n");
|
assert_eq!(text, "the quick \nthey have quiet dreams\n");
|
||||||
|
|
||||||
let text = String::from("❤️");
|
let text = String::from("❤️");
|
||||||
let text = apply_document_changes(|| text, c![0, 0; 0, 0 => "a"]);
|
let text = apply_document_changes(encoding, || text, c![0, 0; 0, 0 => "a"]);
|
||||||
assert_eq!(text, "a❤️");
|
assert_eq!(text, "a❤️");
|
||||||
|
|
||||||
let text = String::from("a\nb");
|
let text = String::from("a\nb");
|
||||||
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
|
let text =
|
||||||
|
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "\nțc", 0, 1; 1, 1 => "d"]);
|
||||||
assert_eq!(text, "adcb");
|
assert_eq!(text, "adcb");
|
||||||
|
|
||||||
let text = String::from("a\nb");
|
let text = String::from("a\nb");
|
||||||
let text = apply_document_changes(|| text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
|
let text =
|
||||||
|
apply_document_changes(encoding, || text, c![0, 1; 1, 0 => "ț\nc", 0, 2; 0, 2 => "c"]);
|
||||||
assert_eq!(text, "ațc\ncb");
|
assert_eq!(text, "ațc\ncb");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -831,6 +831,7 @@ impl GlobalState {
|
||||||
let vfs = &mut this.vfs.write().0;
|
let vfs = &mut this.vfs.write().0;
|
||||||
let file_id = vfs.file_id(&path).unwrap();
|
let file_id = vfs.file_id(&path).unwrap();
|
||||||
let text = apply_document_changes(
|
let text = apply_document_changes(
|
||||||
|
this.config.position_encoding(),
|
||||||
|| std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(),
|
|| std::str::from_utf8(vfs.file_contents(file_id)).unwrap().into(),
|
||||||
params.content_changes,
|
params.content_changes,
|
||||||
);
|
);
|
||||||
|
|
|
@ -31,8 +31,8 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P
|
||||||
let line_col = line_index.index.line_col(offset);
|
let line_col = line_index.index.line_col(offset);
|
||||||
match line_index.encoding {
|
match line_index.encoding {
|
||||||
PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),
|
PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),
|
||||||
PositionEncoding::Utf16 => {
|
PositionEncoding::Wide(enc) => {
|
||||||
let line_col = line_index.index.to_utf16(line_col);
|
let line_col = line_index.index.to_wide(enc, line_col);
|
||||||
lsp_types::Position::new(line_col.line, line_col.col)
|
lsp_types::Position::new(line_col.line, line_col.col)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1429,7 +1429,7 @@ fn main() {
|
||||||
let line_index = LineIndex {
|
let line_index = LineIndex {
|
||||||
index: Arc::new(ide::LineIndex::new(text)),
|
index: Arc::new(ide::LineIndex::new(text)),
|
||||||
endings: LineEndings::Unix,
|
endings: LineEndings::Unix,
|
||||||
encoding: PositionEncoding::Utf16,
|
encoding: PositionEncoding::Utf8,
|
||||||
};
|
};
|
||||||
let converted: Vec<lsp_types::FoldingRange> =
|
let converted: Vec<lsp_types::FoldingRange> =
|
||||||
folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect();
|
folds.into_iter().map(|it| folding_range(text, &line_index, true, it)).collect();
|
||||||
|
|
|
@ -11,6 +11,7 @@ pub mod hash;
|
||||||
pub mod process;
|
pub mod process;
|
||||||
pub mod panic_context;
|
pub mod panic_context;
|
||||||
pub mod non_empty_vec;
|
pub mod non_empty_vec;
|
||||||
|
pub mod rand;
|
||||||
|
|
||||||
pub use always_assert::{always, never};
|
pub use always_assert::{always, never};
|
||||||
|
|
||||||
|
|
21
crates/stdx/src/rand.rs
Normal file
21
crates/stdx/src/rand.rs
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
//! We don't use `rand`, as that's too many things for us.
|
||||||
|
//!
|
||||||
|
//! Currently, we use oorandom instead, but it misses these two utilities.
|
||||||
|
//! Perhaps we should switch to `fastrand`, or our own small prng, it's not like
|
||||||
|
//! we need anything move complicatied that xor-shift.
|
||||||
|
|
||||||
|
pub fn shuffle<T>(slice: &mut [T], mut rand_index: impl FnMut(usize) -> usize) {
|
||||||
|
let mut remaining = slice.len() - 1;
|
||||||
|
while remaining > 0 {
|
||||||
|
let index = rand_index(remaining);
|
||||||
|
slice.swap(remaining, index);
|
||||||
|
remaining -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn seed() -> u64 {
|
||||||
|
use std::collections::hash_map::RandomState;
|
||||||
|
use std::hash::{BuildHasher, Hasher};
|
||||||
|
|
||||||
|
RandomState::new().build_hasher().finish()
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
<!---
|
<!---
|
||||||
lsp_ext.rs hash: ec29403e67dfd15b
|
lsp_ext.rs hash: d87477896dfe41d4
|
||||||
|
|
||||||
If you need to change the above hash to make the test pass, please check if you
|
If you need to change the above hash to make the test pass, please check if you
|
||||||
need to adjust this doc as well and ping this issue:
|
need to adjust this doc as well and ping this issue:
|
||||||
|
|
Loading…
Reference in a new issue