naersk/lib.nix

246 lines
8.4 KiB
Nix
Raw Normal View History

{ lib, writeText, runCommandLocal, remarshal, formats }:
let
builtinz =
builtins // import ./builtins
{ inherit lib writeText remarshal runCommandLocal formats; };
in
2019-06-25 15:36:22 +00:00
rec
{
# The list of _all_ crates (incl. transitive dependencies) with name,
# version and sha256 of the crate
# Example:
# [ { name = "wabt", version = "2.0.6", sha256 = "..." } ]
mkVersions = cargolock:
if builtins.hasAttr "metadata" cargolock then
# TODO: this should nub by <pkg-name>-<pkg-version>
(
lib.concatMap (
x:
let
mdk = mkMetadataKey x.name x.version;
in
(
lib.optional (builtins.hasAttr mdk cargolock.metadata)
{
inherit (x) version name;
sha256 = cargolock.metadata.${mkMetadataKey x.name x.version};
}
) ++ (lib.concatMap (parseDependency cargolock) (x.dependencies or []))
2019-06-25 15:36:22 +00:00
)
cargolock.package
)
2020-02-07 15:38:32 +00:00
else if builtins.hasAttr "package" cargolock then
map (
p:
{
inherit (p) name version;
sha256 = p.checksum;
}
) (builtins.filter (builtins.hasAttr "checksum") cargolock.package)
else [];
# Turns "lib-name lib-ver (registry+...)" to [ { name = "lib-name", etc } ]
# iff the package is present in the Cargo.lock (otherwise returns [])
parseDependency = cargolock: str:
let
components = lib.splitString " " str;
name = lib.elemAt components 0;
version = lib.elemAt components 1;
mdk = mkMetadataKey name version;
in
lib.optional (builtins.hasAttr mdk cargolock.metadata)
(
let
sha256 = cargolock.metadata.${mkMetadataKey name version};
in
{ inherit name version sha256; }
);
# crafts the key used to look up the sha256 in the cargo lock; no
# robustness guarantee
mkMetadataKey = name: version:
"checksum ${name} ${version} (registry+https://github.com/rust-lang/crates.io-index)";
# Gets all git dependencies in Cargo.lock as a list.
# [
# {
# name = "rand";
# url = "https://github.com/...";
# checkout = "/nix/store/checkout"
# }
# ]
findGitDependencies =
{ cargolock, gitAllRefs, gitSubmodules }:
let
query = p: (lib.substring 0 4 (p.source or "")) == "git+";
extractRevision = source: lib.last (lib.splitString "#" source);
extractPart = part: source: if lib.hasInfix part source then lib.last (lib.splitString part (lib.head (lib.splitString "#" source))) else null;
extractRepoUrl = source:
let
splitted = lib.head (lib.splitString "?" source);
split = lib.substring 4 (lib.stringLength splitted) splitted;
in lib.head (lib.splitString "#" split);
parseLock = lock:
let
source = lock.source;
rev = extractPart "?rev=" source;
tag = extractPart "?tag=" source;
branch = extractPart "?branch=" source;
in
{
inherit (lock) name;
revision = extractRevision source;
url = extractRepoUrl source;
} // (lib.optionalAttrs (! isNull branch) { inherit branch; })
// (lib.optionalAttrs (! isNull tag) { inherit tag; })
// (lib.optionalAttrs (! isNull rev) { inherit rev; });
usedPackageLocks =
builtins.map parseLock (lib.filter query cargolock.package);
unusedPackageLocks =
builtins.map parseLock (lib.filter query ((cargolock.patch or []).unused or []));
packageLocks = usedPackageLocks ++ unusedPackageLocks;
mkFetch = lock: {
key = lock.rev or lock.tag or lock.branch or lock.revision
or (throw "No 'rev', 'tag' or 'branch' available to specify key, nor a git revision was found in Cargo.lock");
checkout = builtins.fetchGit ({
url = lock.url;
rev = lock.revision;
} // lib.optionalAttrs (lock ? branch) {
ref = "refs/heads/${lock.branch}";
} // lib.optionalAttrs (lock ? tag) {
ref = "refs/tags/${lock.tag}";
} // lib.optionalAttrs ((lib.versionAtLeast builtins.nixVersion "2.4") && (gitAllRefs || lock ? rev)) {
allRefs = true;
2021-04-18 03:39:01 +00:00
} // lib.optionalAttrs gitSubmodules {
submodules = true;
});
} // lock;
in builtins.map mkFetch packageLocks;
2019-12-17 15:55:52 +00:00
# A very minimal 'src' which makes cargo happy nonetheless
dummySrc =
{ cargoconfig # path
Improve Cargo.toml discovery algorithm Our current Cargo.toml discovery algorithm is based on reading the main Cargo.toml and looking at its `workspace.members` - unfortunately, as it turns out there's actually no requirement for all workspace crates to be listed there, and some applications do fancy things like: ```toml [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } # whoopsie ``` ... which doesn't play nice with Naersk's incremental builds (that is, the compilation fails unless `singleStep = true;` is turned on). This commit changes the logic to use a different approach: now we simply recursively scan the root directory, noting down all the Cargo.tomls we find; Crane seems to use the same algorithm. I think the only case where this approach falls short are spurious Cargo.tomls that are present somewhere in the source directory but are actually unused - for instance, imagine I'm writing a Cargo clone where I've got files like `src/tests/broken-manifest/Cargo.toml` - in cases like these, the current approach will cause the build to fail, because we will try to fixup a broken Cargo.toml, thinking it's used somewhere. We could try to handle cases like these by using an even different approach: instead of traversing the source tree, we could load the main Cargo.toml and traverse its `workspace.members` + all path-based dependencies recursively, noting down all the *explicitly present* Cargo.tomls. That is, given a top-level manifest of: ``` [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } ``` ... we'd note down and load `crates/foo/Cargo.toml` + `crates/bar/Cargo.toml`, and then load _their_ path-based dependencies, to find all transitive Cargo.tomls that belong to this workspace. (that is, we have to scan the crates recursively, because `bar` might depend on `crates/zar` - and that's not an imaginary edge case, that's actually what happens inside Nushell, for instance.) I don't have enough time to implement this more extensive approach now, and IMO the improvement presented here is still an improvement, so I think it's safe to go with the new-but-still-subpar approach here and consider the better algorithm in the future :-) # Testing All tests pass, I've also made sure that incremental builds are still incremental. Closes https://github.com/nix-community/naersk/issues/274
2023-02-24 20:00:26 +00:00
, cargotomls # list
, cargolock # attrset
, copySources # list of paths that should be copied to the output
, copySourcesFrom # path from which to copy ${copySources}
}:
2019-07-30 10:41:32 +00:00
let
2019-10-08 09:57:16 +00:00
cargolock' = builtinz.writeTOML "Cargo.lock" cargolock;
Improve Cargo.toml discovery algorithm Our current Cargo.toml discovery algorithm is based on reading the main Cargo.toml and looking at its `workspace.members` - unfortunately, as it turns out there's actually no requirement for all workspace crates to be listed there, and some applications do fancy things like: ```toml [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } # whoopsie ``` ... which doesn't play nice with Naersk's incremental builds (that is, the compilation fails unless `singleStep = true;` is turned on). This commit changes the logic to use a different approach: now we simply recursively scan the root directory, noting down all the Cargo.tomls we find; Crane seems to use the same algorithm. I think the only case where this approach falls short are spurious Cargo.tomls that are present somewhere in the source directory but are actually unused - for instance, imagine I'm writing a Cargo clone where I've got files like `src/tests/broken-manifest/Cargo.toml` - in cases like these, the current approach will cause the build to fail, because we will try to fixup a broken Cargo.toml, thinking it's used somewhere. We could try to handle cases like these by using an even different approach: instead of traversing the source tree, we could load the main Cargo.toml and traverse its `workspace.members` + all path-based dependencies recursively, noting down all the *explicitly present* Cargo.tomls. That is, given a top-level manifest of: ``` [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } ``` ... we'd note down and load `crates/foo/Cargo.toml` + `crates/bar/Cargo.toml`, and then load _their_ path-based dependencies, to find all transitive Cargo.tomls that belong to this workspace. (that is, we have to scan the crates recursively, because `bar` might depend on `crates/zar` - and that's not an imaginary edge case, that's actually what happens inside Nushell, for instance.) I don't have enough time to implement this more extensive approach now, and IMO the improvement presented here is still an improvement, so I think it's safe to go with the new-but-still-subpar approach here and consider the better algorithm in the future :-) # Testing All tests pass, I've also made sure that incremental builds are still incremental. Closes https://github.com/nix-community/naersk/issues/274
2023-02-24 20:00:26 +00:00
fixupCargoToml = cargotoml:
let
attrs =
# Since we pretend everything is a lib, we remove any mentions
# of binaries
removeAttrs cargotoml [ "bin" "example" "lib" "test" "bench" "default-run" ]
// lib.optionalAttrs (builtins.hasAttr "package" cargotoml) ({ package = removeAttrs cargotoml.package [ "default-run" ] ; })
;
in
attrs // lib.optionalAttrs (lib.hasAttr "package" attrs) {
package = removeAttrs attrs.package [ "build" ];
};
Improve Cargo.toml discovery algorithm Our current Cargo.toml discovery algorithm is based on reading the main Cargo.toml and looking at its `workspace.members` - unfortunately, as it turns out there's actually no requirement for all workspace crates to be listed there, and some applications do fancy things like: ```toml [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } # whoopsie ``` ... which doesn't play nice with Naersk's incremental builds (that is, the compilation fails unless `singleStep = true;` is turned on). This commit changes the logic to use a different approach: now we simply recursively scan the root directory, noting down all the Cargo.tomls we find; Crane seems to use the same algorithm. I think the only case where this approach falls short are spurious Cargo.tomls that are present somewhere in the source directory but are actually unused - for instance, imagine I'm writing a Cargo clone where I've got files like `src/tests/broken-manifest/Cargo.toml` - in cases like these, the current approach will cause the build to fail, because we will try to fixup a broken Cargo.toml, thinking it's used somewhere. We could try to handle cases like these by using an even different approach: instead of traversing the source tree, we could load the main Cargo.toml and traverse its `workspace.members` + all path-based dependencies recursively, noting down all the *explicitly present* Cargo.tomls. That is, given a top-level manifest of: ``` [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } ``` ... we'd note down and load `crates/foo/Cargo.toml` + `crates/bar/Cargo.toml`, and then load _their_ path-based dependencies, to find all transitive Cargo.tomls that belong to this workspace. (that is, we have to scan the crates recursively, because `bar` might depend on `crates/zar` - and that's not an imaginary edge case, that's actually what happens inside Nushell, for instance.) I don't have enough time to implement this more extensive approach now, and IMO the improvement presented here is still an improvement, so I think it's safe to go with the new-but-still-subpar approach here and consider the better algorithm in the future :-) # Testing All tests pass, I've also made sure that incremental builds are still incremental. Closes https://github.com/nix-community/naersk/issues/274
2023-02-24 20:00:26 +00:00
cargotomlss = map
({ name, toml }:
"${name}:${builtinz.writeTOML "Cargo.toml" (fixupCargoToml toml)}")
cargotomls;
2019-08-28 17:21:13 +00:00
2019-07-30 10:41:32 +00:00
in
2022-05-05 14:31:01 +00:00
runCommandLocal "dummy-src"
{ inherit copySources copySourcesFrom cargotomlss; }
''
mkdir -p $out/.cargo
${lib.optionalString (! isNull cargoconfig) "cp ${cargoconfig} $out/.cargo/config"}
cp ${cargolock'} $out/Cargo.lock
for tuple in $cargotomlss; do
member="''${tuple%%:*}"
cargotoml="''${tuple##*:}"
final_dir="$out/$member"
mkdir -p "$final_dir"
final_path="$final_dir/Cargo.toml"
cp $cargotoml "$final_path"
pushd $out/$member > /dev/null
mkdir -p src
Improve Cargo.toml discovery algorithm Our current Cargo.toml discovery algorithm is based on reading the main Cargo.toml and looking at its `workspace.members` - unfortunately, as it turns out there's actually no requirement for all workspace crates to be listed there, and some applications do fancy things like: ```toml [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } # whoopsie ``` ... which doesn't play nice with Naersk's incremental builds (that is, the compilation fails unless `singleStep = true;` is turned on). This commit changes the logic to use a different approach: now we simply recursively scan the root directory, noting down all the Cargo.tomls we find; Crane seems to use the same algorithm. I think the only case where this approach falls short are spurious Cargo.tomls that are present somewhere in the source directory but are actually unused - for instance, imagine I'm writing a Cargo clone where I've got files like `src/tests/broken-manifest/Cargo.toml` - in cases like these, the current approach will cause the build to fail, because we will try to fixup a broken Cargo.toml, thinking it's used somewhere. We could try to handle cases like these by using an even different approach: instead of traversing the source tree, we could load the main Cargo.toml and traverse its `workspace.members` + all path-based dependencies recursively, noting down all the *explicitly present* Cargo.tomls. That is, given a top-level manifest of: ``` [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } ``` ... we'd note down and load `crates/foo/Cargo.toml` + `crates/bar/Cargo.toml`, and then load _their_ path-based dependencies, to find all transitive Cargo.tomls that belong to this workspace. (that is, we have to scan the crates recursively, because `bar` might depend on `crates/zar` - and that's not an imaginary edge case, that's actually what happens inside Nushell, for instance.) I don't have enough time to implement this more extensive approach now, and IMO the improvement presented here is still an improvement, so I think it's safe to go with the new-but-still-subpar approach here and consider the better algorithm in the future :-) # Testing All tests pass, I've also made sure that incremental builds are still incremental. Closes https://github.com/nix-community/naersk/issues/274
2023-02-24 20:00:26 +00:00
# Avoid accidentally pulling `std` for no-std crates.
echo '#![no_std]' >src/lib.rs
Improve Cargo.toml discovery algorithm Our current Cargo.toml discovery algorithm is based on reading the main Cargo.toml and looking at its `workspace.members` - unfortunately, as it turns out there's actually no requirement for all workspace crates to be listed there, and some applications do fancy things like: ```toml [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } # whoopsie ``` ... which doesn't play nice with Naersk's incremental builds (that is, the compilation fails unless `singleStep = true;` is turned on). This commit changes the logic to use a different approach: now we simply recursively scan the root directory, noting down all the Cargo.tomls we find; Crane seems to use the same algorithm. I think the only case where this approach falls short are spurious Cargo.tomls that are present somewhere in the source directory but are actually unused - for instance, imagine I'm writing a Cargo clone where I've got files like `src/tests/broken-manifest/Cargo.toml` - in cases like these, the current approach will cause the build to fail, because we will try to fixup a broken Cargo.toml, thinking it's used somewhere. We could try to handle cases like these by using an even different approach: instead of traversing the source tree, we could load the main Cargo.toml and traverse its `workspace.members` + all path-based dependencies recursively, noting down all the *explicitly present* Cargo.tomls. That is, given a top-level manifest of: ``` [workspace] members = [ "crates/foo" ] [dependencies] foo = { path = "crates/foo" } bar = { path = "crates/bar" } ``` ... we'd note down and load `crates/foo/Cargo.toml` + `crates/bar/Cargo.toml`, and then load _their_ path-based dependencies, to find all transitive Cargo.tomls that belong to this workspace. (that is, we have to scan the crates recursively, because `bar` might depend on `crates/zar` - and that's not an imaginary edge case, that's actually what happens inside Nushell, for instance.) I don't have enough time to implement this more extensive approach now, and IMO the improvement presented here is still an improvement, so I think it's safe to go with the new-but-still-subpar approach here and consider the better algorithm in the future :-) # Testing All tests pass, I've also made sure that incremental builds are still incremental. Closes https://github.com/nix-community/naersk/issues/274
2023-02-24 20:00:26 +00:00
# pretend there's a `build.rs`, otherwise cargo doesn't build
# the `[build-dependencies]`. Custom locations of build scripts
# aren't an issue because we strip the `build` field in
# `fixupCargoToml`; so cargo always thinks there's a build
# script which is `./build.rs`.
2022-05-15 19:43:57 +00:00
echo 'fn main() {}' > build.rs
popd > /dev/null
done
# Copy all the "patched" sources which are used by dependencies.
# This needs to be done after the creation of the dummy to make
# sure the dummy source files do not tramp on the patch
# dependencies.
for p in $copySources; do
echo "Copying patched source $p to $out..."
mkdir -p "$out/$p"
chmod -R +w "$out/$p"
echo copying "$copySourcesFrom/$p"/ to "$out/$p"
cp --no-preserve=mode -R "$copySourcesFrom/$p"/* "$out/$p"
done
'';
mkPackages = cargolock:
lib.foldl' lib.recursiveUpdate {} (
map (p: { ${p.name} = { ${p.version} = p; }; })
cargolock.package
);
directDependencies = cargolock: name: version:
let
packages = mkPackages cargolock;
package = packages.${name}.${version};
in
2019-06-28 15:16:53 +00:00
lib.optionals (builtins.hasAttr "dependencies" package)
(map parseDependency' package.dependencies);
transitiveDeps = cargolock: name: version:
let
wrap = p:
{
key = "${p.name}-${p.version}";
package = p;
};
packages = mkPackages cargolock;
in
2019-06-27 15:49:59 +00:00
builtins.genericClosure
{
startSet = [ (wrap packages.${name}.${version}) ];
operator = p: map (dep: wrap (packages.${dep.name}.${dep.version})) (
(
lib.optionals (builtins.hasAttr "dependencies" p.package)
(map parseDependency' p.package.dependencies)
)
);
};
2019-06-27 15:49:59 +00:00
# turns "<package> <version> ..." into { name = <package>, version = <version>; }
parseDependency' = str:
let
components = lib.splitString " " str;
in
2019-06-28 14:45:41 +00:00
{ name = lib.elemAt components 0; version = lib.elemAt components 1; };
2019-06-25 15:36:22 +00:00
}