10181: Begining of lsif r=HKalbasi a=HKalbasi

This PR adds a `lsif` command to cli, which can be used as `rust-analyzer lsif /path/to/project > dump.lsif`. It now generates a valid, but pretty useless lsif (only supports folding ranges). The propose of this PR is to discussing about the structure of lsif generator, before starting anything serious.

cc `@matklad` #8696 #3098


Co-authored-by: hamidreza kalbasi <hamidrezakalbasi@protonmail.com>
This commit is contained in:
bors[bot] 2021-09-29 13:13:03 +00:00 committed by GitHub
commit ee12b0f173
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 562 additions and 5 deletions

2
.gitignore vendored
View file

@ -11,3 +11,5 @@ generated_assists.adoc
generated_features.adoc
generated_diagnostic.adoc
.DS_Store
/out/
/dump.lsif

4
Cargo.lock generated
View file

@ -862,9 +862,9 @@ dependencies = [
[[package]]
name = "lsp-types"
version = "0.89.2"
version = "0.90.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852e0dedfd52cc32325598b2631e0eba31b7b708959676a9f837042f276b09a2"
checksum = "a7404037aab080771c90b0a499836d9d8a10336ecd07badf969567b65c6d51a1"
dependencies = [
"bitflags",
"serde",

View file

@ -66,3 +66,22 @@ pub(crate) fn annotations(ra_fixture: &str) -> (Analysis, FilePosition, Vec<(Fil
.collect();
(host.analysis(), FilePosition { file_id, offset }, annotations)
}
/// Creates analysis from a multi-file fixture with annonations without $0
pub(crate) fn annotations_without_marker(ra_fixture: &str) -> (Analysis, Vec<(FileRange, String)>) {
let mut host = AnalysisHost::default();
let change_fixture = ChangeFixture::parse(ra_fixture);
host.db.set_enable_proc_attr_macros(true);
host.db.apply_change(change_fixture.change);
let annotations = change_fixture
.files
.iter()
.flat_map(|&file_id| {
let file_text = host.analysis().file_text(file_id).unwrap();
let annotations = extract_annotations(&file_text);
annotations.into_iter().map(move |(range, data)| (FileRange { file_id, range }, data))
})
.collect();
(host.analysis(), annotations)
}

View file

@ -46,6 +46,7 @@ mod references;
mod rename;
mod runnables;
mod ssr;
mod static_index;
mod status;
mod syntax_highlighting;
mod syntax_tree;
@ -86,6 +87,7 @@ pub use crate::{
references::ReferenceSearchResult,
rename::RenameError,
runnables::{Runnable, RunnableKind, TestId},
static_index::{StaticIndex, StaticIndexedFile, TokenId, TokenStaticData},
syntax_highlighting::{
tags::{Highlight, HlMod, HlMods, HlOperator, HlPunct, HlTag},
HlRange,

View file

@ -0,0 +1,256 @@
//! This module provides `StaticIndex` which is used for powering
//! read-only code browsers and emitting LSIF
use std::collections::HashMap;
use hir::Semantics;
use hir::{db::HirDatabase, Crate, Module};
use ide_db::base_db::{FileId, FileRange, SourceDatabaseExt};
use ide_db::defs::Definition;
use ide_db::RootDatabase;
use rustc_hash::FxHashSet;
use syntax::{AstNode, SyntaxKind::*, T};
use syntax::{SyntaxToken, TextRange};
use crate::display::TryToNav;
use crate::hover::hover_for_definition;
use crate::{Analysis, Fold, HoverConfig, HoverDocFormat, HoverResult};
/// A static representation of fully analyzed source code.
///
/// The intended use-case is powering read-only code browsers and emitting LSIF
pub struct StaticIndex<'a> {
pub files: Vec<StaticIndexedFile>,
pub tokens: TokenStore,
analysis: &'a Analysis,
db: &'a RootDatabase,
def_map: HashMap<Definition, TokenId>,
}
pub struct ReferenceData {
pub range: FileRange,
pub is_definition: bool,
}
pub struct TokenStaticData {
pub hover: Option<HoverResult>,
pub definition: Option<FileRange>,
pub references: Vec<ReferenceData>,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct TokenId(usize);
#[derive(Default)]
pub struct TokenStore(Vec<TokenStaticData>);
impl TokenStore {
pub fn insert(&mut self, data: TokenStaticData) -> TokenId {
let id = TokenId(self.0.len());
self.0.push(data);
id
}
pub fn get_mut(&mut self, id: TokenId) -> Option<&mut TokenStaticData> {
self.0.get_mut(id.0)
}
pub fn get(&self, id: TokenId) -> Option<&TokenStaticData> {
self.0.get(id.0)
}
pub fn iter(self) -> impl Iterator<Item = (TokenId, TokenStaticData)> {
self.0.into_iter().enumerate().map(|(i, x)| (TokenId(i), x))
}
}
pub struct StaticIndexedFile {
pub file_id: FileId,
pub folds: Vec<Fold>,
pub tokens: Vec<(TextRange, TokenId)>,
}
fn all_modules(db: &dyn HirDatabase) -> Vec<Module> {
let mut worklist: Vec<_> =
Crate::all(db).into_iter().map(|krate| krate.root_module(db)).collect();
let mut modules = Vec::new();
while let Some(module) = worklist.pop() {
modules.push(module);
worklist.extend(module.children(db));
}
modules
}
impl StaticIndex<'_> {
fn add_file(&mut self, file_id: FileId) {
let folds = self.analysis.folding_ranges(file_id).unwrap();
// hovers
let sema = hir::Semantics::new(self.db);
let tokens_or_nodes = sema.parse(file_id).syntax().clone();
let tokens = tokens_or_nodes.descendants_with_tokens().filter_map(|x| match x {
syntax::NodeOrToken::Node(_) => None,
syntax::NodeOrToken::Token(x) => Some(x),
});
let hover_config =
HoverConfig { links_in_hover: true, documentation: Some(HoverDocFormat::Markdown) };
let tokens = tokens.filter(|token| match token.kind() {
IDENT | INT_NUMBER | LIFETIME_IDENT | T![self] | T![super] | T![crate] => true,
_ => false,
});
let mut result = StaticIndexedFile { file_id, folds, tokens: vec![] };
for token in tokens {
let range = token.text_range();
let node = token.parent().unwrap();
let def = if let Some(x) = get_definition(&sema, token.clone()) {
x
} else {
continue;
};
let id = if let Some(x) = self.def_map.get(&def) {
*x
} else {
let x = self.tokens.insert(TokenStaticData {
hover: hover_for_definition(&sema, file_id, def, &node, &hover_config),
definition: def
.try_to_nav(self.db)
.map(|x| FileRange { file_id: x.file_id, range: x.focus_or_full_range() }),
references: vec![],
});
self.def_map.insert(def, x);
x
};
let token = self.tokens.get_mut(id).unwrap();
token.references.push(ReferenceData {
range: FileRange { range, file_id },
is_definition: if let Some(x) = def.try_to_nav(self.db) {
x.file_id == file_id && x.focus_or_full_range() == range
} else {
false
},
});
result.tokens.push((range, id));
}
self.files.push(result);
}
pub fn compute<'a>(db: &'a RootDatabase, analysis: &'a Analysis) -> StaticIndex<'a> {
let work = all_modules(db).into_iter().filter(|module| {
let file_id = module.definition_source(db).file_id.original_file(db);
let source_root = db.file_source_root(file_id);
let source_root = db.source_root(source_root);
!source_root.is_library
});
let mut this = StaticIndex {
files: vec![],
tokens: Default::default(),
analysis,
db,
def_map: Default::default(),
};
let mut visited_files = FxHashSet::default();
for module in work {
let file_id = module.definition_source(db).file_id.original_file(db);
if visited_files.contains(&file_id) {
continue;
}
this.add_file(file_id);
// mark the file
visited_files.insert(file_id);
}
this
}
}
fn get_definition(sema: &Semantics<RootDatabase>, token: SyntaxToken) -> Option<Definition> {
for token in sema.descend_into_macros_many(token) {
let def = Definition::from_token(&sema, &token);
if let [x] = def.as_slice() {
return Some(*x);
} else {
continue;
};
}
None
}
#[cfg(test)]
mod tests {
use crate::{fixture, StaticIndex};
use ide_db::base_db::FileRange;
use std::collections::HashSet;
fn check_all_ranges(ra_fixture: &str) {
let (analysis, ranges) = fixture::annotations_without_marker(ra_fixture);
let s = StaticIndex::compute(&*analysis.db, &analysis);
let mut range_set: HashSet<_> = ranges.iter().map(|x| x.0).collect();
for f in s.files {
for (range, _) in f.tokens {
let x = FileRange { file_id: f.file_id, range };
if !range_set.contains(&x) {
panic!("additional range {:?}", x);
}
range_set.remove(&x);
}
}
if !range_set.is_empty() {
panic!("unfound ranges {:?}", range_set);
}
}
fn check_definitions(ra_fixture: &str) {
let (analysis, ranges) = fixture::annotations_without_marker(ra_fixture);
let s = StaticIndex::compute(&*analysis.db, &analysis);
let mut range_set: HashSet<_> = ranges.iter().map(|x| x.0).collect();
for (_, t) in s.tokens.iter() {
if let Some(x) = t.definition {
if !range_set.contains(&x) {
panic!("additional definition {:?}", x);
}
range_set.remove(&x);
}
}
if !range_set.is_empty() {
panic!("unfound definitions {:?}", range_set);
}
}
#[test]
fn struct_and_enum() {
check_all_ranges(
r#"
struct Foo;
//^^^
enum E { X(Foo) }
//^ ^ ^^^
"#,
);
check_definitions(
r#"
struct Foo;
//^^^
enum E { X(Foo) }
//^ ^
"#,
);
}
#[test]
fn derives() {
check_all_ranges(
r#"
#[rustc_builtin_macro]
pub macro Copy {}
//^^^^
#[rustc_builtin_macro]
pub macro derive {}
//^^^^^^
#[derive(Copy)]
//^^^^^^ ^^^^
struct Hello(i32);
//^^^^^ ^^^
"#,
);
}
}

View file

@ -22,7 +22,7 @@ crossbeam-channel = "0.5.0"
dissimilar = "1.0.2"
itertools = "0.10.0"
jod-thread = "0.1.0"
lsp-types = { version = "0.89.0", features = ["proposed"] }
lsp-types = { version = "0.90.0", features = ["proposed"] }
parking_lot = "0.11.0"
xflags = "0.2.1"
oorandom = "11.1.2"

View file

@ -87,6 +87,7 @@ fn try_main() -> Result<()> {
flags::RustAnalyzerCmd::Diagnostics(cmd) => cmd.run()?,
flags::RustAnalyzerCmd::Ssr(cmd) => cmd.run()?,
flags::RustAnalyzerCmd::Search(cmd) => cmd.run()?,
flags::RustAnalyzerCmd::Lsif(cmd) => cmd.run()?,
}
Ok(())
}

View file

@ -8,6 +8,7 @@ mod highlight;
mod analysis_stats;
mod diagnostics;
mod ssr;
mod lsif;
mod progress_report;

View file

@ -367,8 +367,6 @@ fn expr_syntax_range(
) -> Option<(VfsPath, LineCol, LineCol)> {
let src = sm.expr_syntax(expr_id);
if let Ok(src) = src {
// FIXME: it might be nice to have a function (on Analysis?) that goes from Source<T> -> (LineCol, LineCol) directly
// But also, we should just turn the type mismatches into diagnostics and provide these
let root = db.parse_or_expand(src.file_id).unwrap();
let node = src.map(|e| e.to_node(&root).syntax().clone());
let original_range = node.as_ref().original_file_range(db);

View file

@ -102,6 +102,10 @@ xflags::xflags! {
}
cmd proc-macro {}
cmd lsif
required path: PathBuf
{}
}
}
@ -129,6 +133,7 @@ pub enum RustAnalyzerCmd {
Ssr(Ssr),
Search(Search),
ProcMacro(ProcMacro),
Lsif(Lsif),
}
#[derive(Debug)]
@ -190,6 +195,11 @@ pub struct Search {
#[derive(Debug)]
pub struct ProcMacro;
#[derive(Debug)]
pub struct Lsif {
pub path: PathBuf,
}
impl RustAnalyzer {
pub const HELP: &'static str = Self::HELP_;

View file

@ -0,0 +1,268 @@
//! LSIF (language server index format) generator
use std::collections::HashMap;
use std::env;
use std::time::Instant;
use ide::{
Analysis, FileId, FileRange, RootDatabase, StaticIndex, StaticIndexedFile, TokenId,
TokenStaticData,
};
use ide_db::LineIndexDatabase;
use ide_db::base_db::salsa::{self, ParallelDatabase};
use lsp_types::{self, lsif};
use project_model::{CargoConfig, ProjectManifest, ProjectWorkspace};
use vfs::{AbsPathBuf, Vfs};
use crate::cli::{
flags,
load_cargo::{load_workspace, LoadCargoConfig},
Result,
};
use crate::line_index::{LineEndings, LineIndex, OffsetEncoding};
use crate::to_proto;
/// Need to wrap Snapshot to provide `Clone` impl for `map_with`
struct Snap<DB>(DB);
impl<DB: ParallelDatabase> Clone for Snap<salsa::Snapshot<DB>> {
fn clone(&self) -> Snap<salsa::Snapshot<DB>> {
Snap(self.0.snapshot())
}
}
struct LsifManager<'a> {
count: i32,
token_map: HashMap<TokenId, Id>,
range_map: HashMap<FileRange, Id>,
file_map: HashMap<FileId, Id>,
analysis: &'a Analysis,
db: &'a RootDatabase,
vfs: &'a Vfs,
}
#[derive(Clone, Copy)]
struct Id(i32);
impl From<Id> for lsp_types::NumberOrString {
fn from(Id(x): Id) -> Self {
lsp_types::NumberOrString::Number(x)
}
}
impl LsifManager<'_> {
fn new<'a>(analysis: &'a Analysis, db: &'a RootDatabase, vfs: &'a Vfs) -> LsifManager<'a> {
LsifManager {
count: 0,
token_map: HashMap::default(),
range_map: HashMap::default(),
file_map: HashMap::default(),
analysis,
db,
vfs,
}
}
fn add(&mut self, data: lsif::Element) -> Id {
let id = Id(self.count);
self.emit(&serde_json::to_string(&lsif::Entry { id: id.into(), data }).unwrap());
self.count += 1;
id
}
fn add_vertex(&mut self, vertex: lsif::Vertex) -> Id {
self.add(lsif::Element::Vertex(vertex))
}
fn add_edge(&mut self, edge: lsif::Edge) -> Id {
self.add(lsif::Element::Edge(edge))
}
// FIXME: support file in addition to stdout here
fn emit(&self, data: &str) {
println!("{}", data);
}
fn get_token_id(&mut self, id: TokenId) -> Id {
if let Some(x) = self.token_map.get(&id) {
return *x;
}
let result_set_id = self.add_vertex(lsif::Vertex::ResultSet(lsif::ResultSet { key: None }));
self.token_map.insert(id, result_set_id);
result_set_id
}
fn get_range_id(&mut self, id: FileRange) -> Id {
if let Some(x) = self.range_map.get(&id) {
return *x;
}
let file_id = id.file_id;
let doc_id = self.get_file_id(file_id);
let line_index = self.db.line_index(file_id);
let line_index = LineIndex {
index: line_index.clone(),
encoding: OffsetEncoding::Utf16,
endings: LineEndings::Unix,
};
let range_id = self.add_vertex(lsif::Vertex::Range {
range: to_proto::range(&line_index, id.range),
tag: None,
});
self.add_edge(lsif::Edge::Contains(lsif::EdgeDataMultiIn {
in_vs: vec![range_id.into()],
out_v: doc_id.into(),
}));
range_id
}
fn get_file_id(&mut self, id: FileId) -> Id {
if let Some(x) = self.file_map.get(&id) {
return *x;
}
let path = self.vfs.file_path(id);
let path = path.as_path().unwrap();
let doc_id = self.add_vertex(lsif::Vertex::Document(lsif::Document {
language_id: "rust".to_string(),
uri: lsp_types::Url::from_file_path(path).unwrap(),
}));
self.file_map.insert(id, doc_id);
doc_id
}
fn add_token(&mut self, id: TokenId, token: TokenStaticData) {
let result_set_id = self.get_token_id(id);
if let Some(hover) = token.hover {
let hover_id = self.add_vertex(lsif::Vertex::HoverResult {
result: lsp_types::Hover {
contents: lsp_types::HoverContents::Markup(to_proto::markup_content(
hover.markup,
)),
range: None,
},
});
self.add_edge(lsif::Edge::Hover(lsif::EdgeData {
in_v: hover_id.into(),
out_v: result_set_id.into(),
}));
}
if let Some(def) = token.definition {
let result_id = self.add_vertex(lsif::Vertex::DefinitionResult);
let def_vertex = self.get_range_id(def);
self.add_edge(lsif::Edge::Item(lsif::Item {
document: (*self.file_map.get(&def.file_id).unwrap()).into(),
property: None,
edge_data: lsif::EdgeDataMultiIn {
in_vs: vec![def_vertex.into()],
out_v: result_id.into(),
},
}));
self.add_edge(lsif::Edge::Definition(lsif::EdgeData {
in_v: result_id.into(),
out_v: result_set_id.into(),
}));
}
if !token.references.is_empty() {
let result_id = self.add_vertex(lsif::Vertex::ReferenceResult);
self.add_edge(lsif::Edge::References(lsif::EdgeData {
in_v: result_id.into(),
out_v: result_set_id.into(),
}));
for x in token.references {
let vertex = *self.range_map.get(&x.range).unwrap();
self.add_edge(lsif::Edge::Item(lsif::Item {
document: (*self.file_map.get(&x.range.file_id).unwrap()).into(),
property: Some(if x.is_definition {
lsif::ItemKind::Definitions
} else {
lsif::ItemKind::References
}),
edge_data: lsif::EdgeDataMultiIn {
in_vs: vec![vertex.into()],
out_v: result_id.into(),
},
}));
}
}
}
fn add_file(&mut self, file: StaticIndexedFile) {
let StaticIndexedFile { file_id, tokens, folds } = file;
let doc_id = self.get_file_id(file_id);
let text = self.analysis.file_text(file_id).unwrap();
let line_index = self.db.line_index(file_id);
let line_index = LineIndex {
index: line_index.clone(),
encoding: OffsetEncoding::Utf16,
endings: LineEndings::Unix,
};
let result = folds
.into_iter()
.map(|it| to_proto::folding_range(&*text, &line_index, false, it))
.collect();
let folding_id = self.add_vertex(lsif::Vertex::FoldingRangeResult { result });
self.add_edge(lsif::Edge::FoldingRange(lsif::EdgeData {
in_v: folding_id.into(),
out_v: doc_id.into(),
}));
let tokens_id = tokens
.into_iter()
.map(|(range, id)| {
let range_id = self.add_vertex(lsif::Vertex::Range {
range: to_proto::range(&line_index, range),
tag: None,
});
self.range_map.insert(FileRange { file_id, range }, range_id);
let result_set_id = self.get_token_id(id);
self.add_edge(lsif::Edge::Next(lsif::EdgeData {
in_v: result_set_id.into(),
out_v: range_id.into(),
}));
range_id.into()
})
.collect();
self.add_edge(lsif::Edge::Contains(lsif::EdgeDataMultiIn {
in_vs: tokens_id,
out_v: doc_id.into(),
}));
}
}
impl flags::Lsif {
pub fn run(self) -> Result<()> {
eprintln!("Generating LSIF started...");
let now = Instant::now();
let cargo_config = CargoConfig::default();
let no_progress = &|_| ();
let load_cargo_config = LoadCargoConfig {
load_out_dirs_from_check: true,
with_proc_macro: true,
prefill_caches: false,
};
let path = AbsPathBuf::assert(env::current_dir()?.join(&self.path));
let manifest = ProjectManifest::discover_single(&path)?;
let workspace = ProjectWorkspace::load(manifest, &cargo_config, no_progress)?;
let (host, vfs, _proc_macro) = load_workspace(workspace, &load_cargo_config)?;
let db = host.raw_database();
let analysis = host.analysis();
let si = StaticIndex::compute(db, &analysis);
let mut lsif = LsifManager::new(&analysis, db, &vfs);
lsif.add_vertex(lsif::Vertex::MetaData(lsif::MetaData {
version: String::from("0.5.0"),
project_root: lsp_types::Url::from_file_path(path).unwrap(),
position_encoding: lsif::Encoding::Utf16,
tool_info: None,
}));
for file in si.files {
lsif.add_file(file);
}
for (id, token) in si.tokens.iter() {
lsif.add_token(id, token);
}
eprintln!("Generating LSIF finished in {:?}", now.elapsed());
Ok(())
}
}