Fix LSP non-ascii characters offset issues. (#14002)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->
This PR is supposed to fix #13582, #11522, as well as related goto
definition/reference issues (wrong position if non ascii characters
ahead).

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->

<img width="411" alt="image"
src="https://github.com/user-attachments/assets/9a81953c-81b2-490d-a842-14ccaefd6972">

Changes:
1. span/completion should use byte offset instead of character index
2. lsp Postions related ops in Ropey remain to use character index

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

Should be none, tested in neovim with config:
```lua
require("lspconfig").nushell.setup({
  cmd = {
    "nu",
    "-I",
    vim.fn.getcwd(),
    "--no-config-file",
    "--lsp",
  },
  filetypes = { "nu" },
})
```

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

tests::complete_command_with_utf_line parameters fixed to align with
true lsp requests (in character index, not byte).
As for the issue_11522.nu, manually tested:

<img width="520" alt="image"
src="https://github.com/user-attachments/assets/45496ba8-5a2d-4998-9190-d7bde31ee72c">


# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
zc he 2024-10-29 19:35:37 +08:00 committed by GitHub
parent f8d4adfb7a
commit 6cdc9e3b77
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 273 additions and 69 deletions

View file

@ -103,7 +103,7 @@ log = "0.4"
lru = "0.12"
lscolors = { version = "0.17", default-features = false }
lsp-server = "0.7.5"
lsp-types = "0.95.0"
lsp-types = { version = "0.95.0", features = ["proposed"] }
mach2 = "0.4"
md5 = { version = "0.10", package = "md-5" }
miette = "7.2"

View file

@ -45,7 +45,12 @@ impl LanguageServer {
let message = err.to_string();
diagnostics.diagnostics.push(Diagnostic {
range: Self::span_to_range(&err.span(), rope_of_file, offset),
range: Self::span_to_range(
&err.span(),
rope_of_file,
offset,
&self.position_encoding,
),
severity: Some(DiagnosticSeverity::ERROR),
message,
..Default::default()
@ -71,7 +76,7 @@ mod tests {
#[test]
fn publish_diagnostics_variable_does_not_exists() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -102,7 +107,7 @@ mod tests {
#[test]
fn publish_diagnostics_fixed_unknown_variable() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");

View file

@ -4,17 +4,18 @@ use lsp_types::{
request::{Completion, GotoDefinition, HoverRequest, Request},
CompletionItem, CompletionItemKind, CompletionParams, CompletionResponse, CompletionTextEdit,
GotoDefinitionParams, GotoDefinitionResponse, Hover, HoverContents, HoverParams, Location,
MarkupContent, MarkupKind, OneOf, Range, ServerCapabilities, TextDocumentSyncKind, TextEdit,
Url,
MarkupContent, MarkupKind, OneOf, Position, PositionEncodingKind, Range, ServerCapabilities,
TextDocumentSyncKind, TextEdit, Url,
};
use miette::{IntoDiagnostic, Result};
use nu_cli::{NuCompleter, SuggestionKind};
use nu_parser::{flatten_block, parse, FlatShape};
use nu_protocol::{
engine::{EngineState, Stack, StateWorkingSet},
engine::{CachedFile, EngineState, Stack, StateWorkingSet},
DeclId, Span, Value, VarId,
};
use ropey::Rope;
use serde_json::json;
use std::{
collections::BTreeMap,
path::{Path, PathBuf},
@ -36,6 +37,7 @@ pub struct LanguageServer {
connection: Connection,
io_threads: Option<IoThreads>,
ropes: BTreeMap<PathBuf, Rope>,
position_encoding: PositionEncodingKind,
}
impl LanguageServer {
@ -52,9 +54,21 @@ impl LanguageServer {
connection,
io_threads,
ropes: BTreeMap::new(),
position_encoding: PositionEncodingKind::UTF16,
})
}
fn get_offset_encoding(&self, initialization_params: serde_json::Value) -> String {
initialization_params
.pointer("/capabilities/offsetEncoding/0")
.unwrap_or(
initialization_params
.pointer("/capabilities/offset_encoding/0")
.unwrap_or(&json!("utf-16")),
)
.to_string()
}
pub fn serve_requests(mut self, engine_state: EngineState) -> Result<()> {
let server_capabilities = serde_json::to_value(ServerCapabilities {
text_document_sync: Some(lsp_types::TextDocumentSyncCapability::Kind(
@ -67,12 +81,14 @@ impl LanguageServer {
})
.expect("Must be serializable");
let _initialization_params = self
let initialization_params = self
.connection
.initialize_while(server_capabilities, || {
!engine_state.signals().interrupted()
})
.into_diagnostic()?;
self.position_encoding =
PositionEncodingKind::from(self.get_offset_encoding(initialization_params));
while !engine_state.signals().interrupted() {
let msg = match self
@ -174,29 +190,78 @@ impl LanguageServer {
}
}
fn span_to_range(span: &Span, rope_of_file: &Rope, offset: usize) -> lsp_types::Range {
let line = rope_of_file.byte_to_line(span.start - offset);
let character = span.start - offset - rope_of_file.line_to_char(line);
let start = lsp_types::Position {
line: line as u32,
character: character as u32,
};
let line = rope_of_file.byte_to_line(span.end - offset);
let character = span.end - offset - rope_of_file.line_to_char(line);
let end = lsp_types::Position {
line: line as u32,
character: character as u32,
};
lsp_types::Range { start, end }
fn span_to_range(
span: &Span,
rope_of_file: &Rope,
offset: usize,
position_encoding: &PositionEncodingKind,
) -> Range {
let start = Self::lsp_byte_offset_to_utf_cu_position(
span.start.saturating_sub(offset),
rope_of_file,
position_encoding,
);
let end = Self::lsp_byte_offset_to_utf_cu_position(
span.end.saturating_sub(offset),
rope_of_file,
position_encoding,
);
Range { start, end }
}
pub fn lsp_position_to_location(position: &lsp_types::Position, rope_of_file: &Rope) -> usize {
let line_idx = rope_of_file.line_to_char(position.line as usize);
line_idx + position.character as usize
fn lsp_byte_offset_to_utf_cu_position(
offset: usize,
rope_of_file: &Rope,
position_encoding: &PositionEncodingKind,
) -> Position {
let line = rope_of_file.try_byte_to_line(offset).unwrap_or(0);
match position_encoding.as_str() {
"\"utf-8\"" => {
let character = offset - rope_of_file.line_to_byte(line);
Position {
line: line as u32,
character: character as u32,
}
}
_ => {
let character = rope_of_file.char_to_utf16_cu(rope_of_file.byte_to_char(offset))
- rope_of_file.char_to_utf16_cu(rope_of_file.line_to_char(line));
Position {
line: line as u32,
character: character as u32,
}
}
}
}
fn utf16_cu_position_to_char(rope_of_file: &Rope, position: &Position) -> usize {
let line_utf_idx =
rope_of_file.char_to_utf16_cu(rope_of_file.line_to_char(position.line as usize));
rope_of_file.utf16_cu_to_char(line_utf_idx + position.character as usize)
}
pub fn lsp_position_to_location(
position: &Position,
rope_of_file: &Rope,
position_encoding: &PositionEncodingKind,
) -> usize {
match position_encoding.as_str() {
"\"utf-8\"" => rope_of_file.byte_to_char(
rope_of_file.line_to_byte(position.line as usize) + position.character as usize,
),
_ => Self::utf16_cu_position_to_char(rope_of_file, position),
}
}
fn lsp_position_to_byte_offset(&self, position: &Position, rope_of_file: &Rope) -> usize {
match self.position_encoding.as_str() {
"\"utf-8\"" => {
rope_of_file.line_to_byte(position.line as usize) + position.character as usize
}
_ => rope_of_file
.try_char_to_byte(Self::utf16_cu_position_to_char(rope_of_file, position))
.expect("Character index out of range!"),
}
}
fn find_id(
@ -240,7 +305,7 @@ impl LanguageServer {
}
fn read_in_file<'a>(
&mut self,
&self,
engine_state: &'a mut EngineState,
file_url: &Url,
) -> Option<(&Rope, &PathBuf, StateWorkingSet<'a>)> {
@ -253,6 +318,15 @@ impl LanguageServer {
Some((file, path, working_set))
}
fn rope_file_from_cached_file(&mut self, cached_file: &CachedFile) -> Result<(Url, &Rope), ()> {
let uri = Url::from_file_path(&*cached_file.name)?;
let rope_of_file = self.ropes.entry(uri.to_file_path()?).or_insert_with(|| {
let raw_string = String::from_utf8_lossy(&cached_file.content);
Rope::from_str(&raw_string)
});
Ok((uri, rope_of_file))
}
fn goto_definition(
&mut self,
engine_state: &mut EngineState,
@ -270,7 +344,7 @@ impl LanguageServer {
&mut working_set,
path,
file,
Self::lsp_position_to_location(&params.text_document_position_params.position, file),
self.lsp_position_to_byte_offset(&params.text_document_position_params.position, file),
)?;
match id {
@ -280,12 +354,16 @@ impl LanguageServer {
if let Some(span) = &block.span {
for cached_file in working_set.files() {
if cached_file.covered_span.contains(span.start) {
let position_encoding = self.position_encoding.clone();
let (uri, rope_of_file) =
self.rope_file_from_cached_file(cached_file).ok()?;
return Some(GotoDefinitionResponse::Scalar(Location {
uri: Url::from_file_path(&*cached_file.name).ok()?,
uri,
range: Self::span_to_range(
span,
file,
rope_of_file,
cached_file.covered_span.start,
&position_encoding,
),
}));
}
@ -300,16 +378,16 @@ impl LanguageServer {
.covered_span
.contains(var.declaration_span.start)
{
let position_encoding = self.position_encoding.clone();
let (uri, rope_of_file) =
self.rope_file_from_cached_file(cached_file).ok()?;
return Some(GotoDefinitionResponse::Scalar(Location {
uri: params
.text_document_position_params
.text_document
.uri
.clone(),
uri,
range: Self::span_to_range(
&var.declaration_span,
file,
rope_of_file,
cached_file.covered_span.start,
&position_encoding,
),
}));
}
@ -333,7 +411,7 @@ impl LanguageServer {
&mut working_set,
path,
file,
Self::lsp_position_to_location(&params.text_document_position_params.position, file),
self.lsp_position_to_byte_offset(&params.text_document_position_params.position, file),
)?;
match id {
@ -552,7 +630,7 @@ impl LanguageServer {
NuCompleter::new(Arc::new(engine_state.clone()), Arc::new(Stack::new()));
let location =
Self::lsp_position_to_location(&params.text_document_position.position, rope_of_file);
self.lsp_position_to_byte_offset(&params.text_document_position.position, rope_of_file);
let results =
completer.fetch_completions_at(&rope_of_file.to_string()[..location], location);
if results.is_empty() {
@ -618,7 +696,9 @@ mod tests {
use nu_test_support::fs::{fixtures, root};
use std::sync::mpsc::Receiver;
pub fn initialize_language_server() -> (Connection, Receiver<Result<()>>) {
pub fn initialize_language_server(
client_offset_encoding: Option<Vec<String>>,
) -> (Connection, Receiver<Result<()>>) {
use std::sync::mpsc;
let (client_connection, server_connection) = Connection::memory();
let lsp_server = LanguageServer::initialize_connection(server_connection, None).unwrap();
@ -636,6 +716,10 @@ mod tests {
id: 1.into(),
method: Initialize::METHOD.to_string(),
params: serde_json::to_value(InitializeParams {
capabilities: lsp_types::ClientCapabilities {
offset_encoding: client_offset_encoding,
..Default::default()
},
..Default::default()
})
.unwrap(),
@ -659,7 +743,7 @@ mod tests {
#[test]
fn shutdown_on_request() {
let (client_connection, recv) = initialize_language_server();
let (client_connection, recv) = initialize_language_server(None);
client_connection
.sender
@ -685,7 +769,7 @@ mod tests {
#[test]
fn goto_definition_for_none_existing_file() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut none_existent_path = root();
none_existent_path.push("none-existent.nu");
@ -700,7 +784,7 @@ mod tests {
text_document: TextDocumentIdentifier {
uri: Url::from_file_path(none_existent_path).unwrap(),
},
position: lsp_types::Position {
position: Position {
line: 0,
character: 0,
},
@ -817,7 +901,7 @@ mod tests {
params: serde_json::to_value(GotoDefinitionParams {
text_document_position_params: TextDocumentPositionParams {
text_document: TextDocumentIdentifier { uri },
position: lsp_types::Position { line, character },
position: Position { line, character },
},
work_done_progress_params: WorkDoneProgressParams::default(),
partial_result_params: PartialResultParams::default(),
@ -834,7 +918,7 @@ mod tests {
#[test]
fn goto_definition_of_variable() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -865,7 +949,7 @@ mod tests {
#[test]
fn goto_definition_of_command() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -894,9 +978,72 @@ mod tests {
);
}
#[test]
fn goto_definition_of_command_utf8() {
let (client_connection, _recv) =
initialize_language_server(Some(vec!["utf-8".to_string(), "utf-16".to_string()]));
let mut script = fixtures();
script.push("lsp");
script.push("goto");
script.push("command_unicode.nu");
let script = Url::from_file_path(script).unwrap();
open_unchecked(&client_connection, script.clone());
let resp = goto_definition(&client_connection, script.clone(), 4, 1);
let result = if let Message::Response(response) = resp {
response.result
} else {
panic!()
};
assert_json_eq!(
result,
serde_json::json!({
"uri": script,
"range": {
"start": { "line": 0, "character": 28 },
"end": { "line": 2, "character": 1 }
}
})
);
}
#[test]
fn goto_definition_of_command_utf16() {
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
script.push("goto");
script.push("command_unicode.nu");
let script = Url::from_file_path(script).unwrap();
open_unchecked(&client_connection, script.clone());
let resp = goto_definition(&client_connection, script.clone(), 4, 1);
let result = if let Message::Response(response) = resp {
response.result
} else {
panic!()
};
assert_json_eq!(
result,
serde_json::json!({
"uri": script,
"range": {
"start": { "line": 0, "character": 19 },
"end": { "line": 2, "character": 1 }
}
})
);
}
#[test]
fn goto_definition_of_command_parameter() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -934,7 +1081,7 @@ mod tests {
params: serde_json::to_value(HoverParams {
text_document_position_params: TextDocumentPositionParams {
text_document: TextDocumentIdentifier { uri },
position: lsp_types::Position { line, character },
position: Position { line, character },
},
work_done_progress_params: WorkDoneProgressParams::default(),
})
@ -950,7 +1097,7 @@ mod tests {
#[test]
fn hover_on_variable() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -977,7 +1124,7 @@ mod tests {
#[test]
fn hover_on_custom_command() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -1007,7 +1154,7 @@ mod tests {
#[test]
fn hover_on_str_join() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -1044,7 +1191,7 @@ mod tests {
params: serde_json::to_value(CompletionParams {
text_document_position: TextDocumentPositionParams {
text_document: TextDocumentIdentifier { uri },
position: lsp_types::Position { line, character },
position: Position { line, character },
},
work_done_progress_params: WorkDoneProgressParams::default(),
partial_result_params: PartialResultParams::default(),
@ -1062,7 +1209,7 @@ mod tests {
#[test]
fn complete_on_variable() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -1099,7 +1246,7 @@ mod tests {
#[test]
fn complete_command_with_space() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -1136,8 +1283,9 @@ mod tests {
}
#[test]
fn complete_command_with_utf_line() {
let (client_connection, _recv) = initialize_language_server();
fn complete_command_with_utf8_line() {
let (client_connection, _recv) =
initialize_language_server(Some(vec!["utf-8".to_string()]));
let mut script = fixtures();
script.push("lsp");
@ -1173,9 +1321,48 @@ mod tests {
);
}
#[test]
fn complete_command_with_utf16_line() {
let (client_connection, _recv) =
initialize_language_server(Some(vec!["utf-16".to_string()]));
let mut script = fixtures();
script.push("lsp");
script.push("completion");
script.push("utf_pipeline.nu");
let script = Url::from_file_path(script).unwrap();
open_unchecked(&client_connection, script.clone());
let resp = complete(&client_connection, script, 0, 13);
let result = if let Message::Response(response) = resp {
response.result
} else {
panic!()
};
assert_json_eq!(
result,
serde_json::json!([
{
"label": "str trim",
"detail": "Trim whitespace or specific character.",
"textEdit": {
"range": {
"start": { "line": 0, "character": 8 },
"end": { "line": 0, "character": 13 },
},
"newText": "str trim"
},
"kind": 3
}
])
);
}
#[test]
fn complete_keyword() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");

View file

@ -64,20 +64,27 @@ impl LanguageServer {
if let Ok(file_path) = params.text_document.uri.to_file_path() {
for content_change in params.content_changes.into_iter() {
let entry = self.ropes.entry(file_path.clone());
match (content_change.range, content_change.range) {
(Some(range), _) => {
match content_change.range {
Some(range) => {
entry.and_modify(|rope| {
let start = Self::lsp_position_to_location(&range.start, rope);
let end = Self::lsp_position_to_location(&range.end, rope);
let start = Self::lsp_position_to_location(
&range.start,
rope,
&self.position_encoding,
);
let end = Self::lsp_position_to_location(
&range.end,
rope,
&self.position_encoding,
);
rope.remove(start..end);
rope.insert(start, &content_change.text);
});
}
(None, None) => {
None => {
entry.and_modify(|r| *r = Rope::from_str(&content_change.text));
}
_ => {}
}
}
@ -99,7 +106,7 @@ mod tests {
#[test]
fn hover_correct_documentation_on_let() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -129,7 +136,7 @@ mod tests {
#[test]
fn hover_on_command_after_full_content_change() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -170,7 +177,7 @@ hello"#,
#[test]
fn hover_on_command_after_partial_content_change() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");
@ -215,7 +222,7 @@ hello"#,
#[test]
fn open_document_with_utf_char() {
let (client_connection, _recv) = initialize_language_server();
let (client_connection, _recv) = initialize_language_server(None);
let mut script = fixtures();
script.push("lsp");

View file

@ -0,0 +1,5 @@
def 🐚🤖啊あà [name] {
$"hello ($name)"
}
🐚🤖啊あà nushell