Merge pull request #18806 from LHolten/deduplicate-crate-graph

fix: Deduplicate crate graph
This commit is contained in:
Lukas Wirth 2025-01-03 14:56:14 +00:00 committed by GitHub
commit a440fcb698
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 19299 additions and 35 deletions

View file

@ -490,21 +490,25 @@ impl CrateGraph {
}
}
pub fn sort_deps(&mut self) {
self.arena
.iter_mut()
.for_each(|(_, data)| data.dependencies.sort_by_key(|dep| dep.crate_id));
}
/// Extends this crate graph by adding a complete second crate
/// graph and adjust the ids in the [`ProcMacroPaths`] accordingly.
///
/// This will deduplicate the crates of the graph where possible.
/// Furthermore dependencies are sorted by crate id to make deduplication easier.
///
/// Returns a map mapping `other`'s IDs to the new IDs in `self`.
pub fn extend(
&mut self,
mut other: CrateGraph,
proc_macros: &mut ProcMacroPaths,
) -> FxHashMap<CrateId, CrateId> {
// Sorting here is a bit pointless because the input is likely already sorted.
// However, the overhead is small and it makes the `extend` method harder to misuse.
self.arena
.iter_mut()
.for_each(|(_, data)| data.dependencies.sort_by_key(|dep| dep.crate_id));
let m = self.len();
let topo = other.crates_in_topological_order();
let mut id_map: FxHashMap<CrateId, CrateId> = FxHashMap::default();
for topo in topo {
@ -513,7 +517,8 @@ impl CrateGraph {
crate_data.dependencies.iter_mut().for_each(|dep| dep.crate_id = id_map[&dep.crate_id]);
crate_data.dependencies.sort_by_key(|dep| dep.crate_id);
let new_id = self.arena.alloc(crate_data.clone());
let find = self.arena.iter().take(m).find_map(|(k, v)| (v == crate_data).then_some(k));
let new_id = find.unwrap_or_else(|| self.arena.alloc(crate_data.clone()));
id_map.insert(topo, new_id);
}

View file

@ -4,7 +4,7 @@ use paths::Utf8Path;
use rustc_hash::FxHashMap;
use toolchain::Tool;
use crate::{utf8_stdout, CargoWorkspace, ManifestPath, PackageData, Sysroot, TargetKind};
use crate::{utf8_stdout, ManifestPath, PackageData, Sysroot, TargetKind};
/// Recreates the compile-time environment variables that Cargo sets.
///
@ -51,23 +51,13 @@ pub(crate) fn inject_cargo_env(env: &mut Env) {
env.set("CARGO", Tool::Cargo.path().to_string());
}
pub(crate) fn inject_rustc_tool_env(
env: &mut Env,
cargo: &CargoWorkspace,
cargo_name: &str,
kind: TargetKind,
) {
pub(crate) fn inject_rustc_tool_env(env: &mut Env, cargo_name: &str, kind: TargetKind) {
_ = kind;
// FIXME
// if kind.is_executable() {
// env.set("CARGO_BIN_NAME", cargo_name);
// }
env.set("CARGO_CRATE_NAME", cargo_name.replace('-', "_"));
// NOTE: Technically we should set this for all crates, but that will worsen the deduplication
// logic so for now just keeping it proc-macros ought to be fine.
if kind.is_proc_macro() {
env.set("CARGO_RUSTC_CURRENT_DIR", cargo.manifest_path().parent().to_string());
}
}
pub(crate) fn cargo_config_env(

View file

@ -18,18 +18,25 @@ use crate::{
};
fn load_cargo(file: &str) -> (CrateGraph, ProcMacroPaths) {
load_cargo_with_overrides(file, CfgOverrides::default())
let project_workspace = load_workspace_from_metadata(file);
to_crate_graph(project_workspace, &mut Default::default())
}
fn load_cargo_with_overrides(
file: &str,
cfg_overrides: CfgOverrides,
) -> (CrateGraph, ProcMacroPaths) {
let project_workspace =
ProjectWorkspace { cfg_overrides, ..load_workspace_from_metadata(file) };
to_crate_graph(project_workspace, &mut Default::default())
}
fn load_workspace_from_metadata(file: &str) -> ProjectWorkspace {
let meta: Metadata = get_test_json_file(file);
let manifest_path =
ManifestPath::try_from(AbsPathBuf::try_from(meta.workspace_root.clone()).unwrap()).unwrap();
let cargo_workspace = CargoWorkspace::new(meta, manifest_path, Default::default());
let project_workspace = ProjectWorkspace {
ProjectWorkspace {
kind: ProjectWorkspaceKind::Cargo {
cargo: cargo_workspace,
build_scripts: WorkspaceBuildScripts::default(),
@ -37,13 +44,12 @@ fn load_cargo_with_overrides(
error: None,
set_test: true,
},
cfg_overrides,
cfg_overrides: Default::default(),
sysroot: Sysroot::empty(),
rustc_cfg: Vec::new(),
toolchain: None,
target_layout: Err("target_data_layout not loaded".into()),
};
to_crate_graph(project_workspace)
}
}
fn load_rust_project(file: &str) -> (CrateGraph, ProcMacroPaths) {
@ -58,7 +64,7 @@ fn load_rust_project(file: &str) -> (CrateGraph, ProcMacroPaths) {
target_layout: Err(Arc::from("test has no data layout")),
cfg_overrides: Default::default(),
};
to_crate_graph(project_workspace)
to_crate_graph(project_workspace, &mut Default::default())
}
fn get_test_json_file<T: DeserializeOwned>(file: &str) -> T {
@ -127,13 +133,15 @@ fn rooted_project_json(data: ProjectJsonData) -> ProjectJson {
ProjectJson::new(None, base, data)
}
fn to_crate_graph(project_workspace: ProjectWorkspace) -> (CrateGraph, ProcMacroPaths) {
fn to_crate_graph(
project_workspace: ProjectWorkspace,
file_map: &mut FxHashMap<AbsPathBuf, FileId>,
) -> (CrateGraph, ProcMacroPaths) {
project_workspace.to_crate_graph(
&mut {
let mut counter = 0;
move |_path| {
counter += 1;
Some(FileId::from_raw(counter))
|path| {
let len = file_map.len() + 1;
Some(*file_map.entry(path.to_path_buf()).or_insert(FileId::from_raw(len as u32)))
}
},
&Default::default(),
@ -221,6 +229,33 @@ fn rust_project_is_proc_macro_has_proc_macro_dep() {
crate_data.dependencies.iter().find(|&dep| dep.name.deref() == "proc_macro").unwrap();
}
#[test]
fn crate_graph_dedup_identical() {
let (mut crate_graph, proc_macros) = load_cargo("regex-metadata.json");
let (d_crate_graph, mut d_proc_macros) = (crate_graph.clone(), proc_macros.clone());
crate_graph.extend(d_crate_graph.clone(), &mut d_proc_macros);
assert!(crate_graph.iter().eq(d_crate_graph.iter()));
assert_eq!(proc_macros, d_proc_macros);
}
#[test]
fn crate_graph_dedup() {
let mut file_map = Default::default();
let ripgrep_workspace = load_workspace_from_metadata("ripgrep-metadata.json");
let (mut crate_graph, _proc_macros) = to_crate_graph(ripgrep_workspace, &mut file_map);
assert_eq!(crate_graph.iter().count(), 71);
let regex_workspace = load_workspace_from_metadata("regex-metadata.json");
let (regex_crate_graph, mut regex_proc_macros) = to_crate_graph(regex_workspace, &mut file_map);
assert_eq!(regex_crate_graph.iter().count(), 50);
crate_graph.extend(regex_crate_graph, &mut regex_proc_macros);
assert_eq!(crate_graph.iter().count(), 108);
}
#[test]
fn smoke_test_real_sysroot_cargo() {
let file_map = &mut FxHashMap::<AbsPathBuf, FileId>::default();

View file

@ -1362,12 +1362,10 @@ fn add_target_crate_root(
let mut env = cargo.env().clone();
inject_cargo_package_env(&mut env, pkg);
inject_cargo_env(&mut env);
inject_rustc_tool_env(&mut env, cargo, cargo_name, kind);
inject_rustc_tool_env(&mut env, cargo_name, kind);
if let Some(envs) = build_data.map(|(it, _)| &it.envs) {
for (k, v) in envs {
env.set(k, v.clone());
}
env.extend_from_other(envs);
}
let crate_id = crate_graph.add_crate_root(
file_id,

View file

@ -479,7 +479,7 @@
},
11: CrateData {
root_file_id: FileId(
12,
11,
),
edition: Edition2018,
version: None,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff