feat: add relationships to ELF package discovery (#2715)

This PR adds DependencyOf relationships when ELF packages have been discovered by the binary cataloger. The discovered file.Executable type has a []ImportedLibraries that's read from the file when discovered by syft. By mapping these imported libraries back to the package collection, syft is able to create relationships showing which packages are dependencies of other packages by just reading metadata from the ELF executable.

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Signed-off-by: Brian Ebarb <ebarb.brian@sers.noreply.github.com>
Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Brian Ebarb 2024-05-09 12:53:59 -05:00 committed by GitHub
parent 74b01a1c38
commit 4194a2cd34
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1298 additions and 7 deletions

View file

@ -27,7 +27,7 @@ runs:
using: "composite"
steps:
# note: go mod and build is automatically cached on default with v4+
- uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe #v4.1.0
- uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 #v5.0.1
if: inputs.go-version != ''
with:
go-version: ${{ inputs.go-version }}

View file

@ -60,6 +60,7 @@ output:
uniq-by-line: false
run:
timeout: 10m
tests: false
# do not enable...
# - deadcode # The owner seems to have abandoned the linter. Replaced by "unused".

View file

@ -0,0 +1,65 @@
package integration
import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/require"
"testing"
)
func TestBinaryElfRelationships(t *testing.T) {
// node --> ["dependency of" nodes]
expectedGraph := map[string][]string{
"glibc": {
"libhello_world.so",
"syfttestfixture",
},
"libstdc++": {
"syfttestfixture",
},
"libhello_world.so": {
"syfttestfixture",
},
}
// run the test...
sbom, _ := catalogFixtureImage(t, "elf-test-fixtures", source.SquashedScope)
// get a mapping of package names to their IDs
nameToId := map[string]artifact.ID{}
recordPkgId := func(name string) {
pkgs := sbom.Artifacts.Packages.PackagesByName(name)
require.NotEmpty(t, pkgs, "expected package %q to be present in the SBOM", name)
for _, p := range pkgs {
nameToId[p.Name] = p.ID()
}
}
for name, depNames := range expectedGraph {
recordPkgId(name)
for _, depName := range depNames {
recordPkgId(depName)
}
}
for name, expectedDepNames := range expectedGraph {
pkgId := nameToId[name]
p := sbom.Artifacts.Packages.Package(pkgId)
require.NotNil(t, p, "expected package %q to be present in the SBOM", name)
rels := sbom.RelationshipsForPackage(*p, artifact.DependencyOfRelationship)
require.NotEmpty(t, rels, "expected package %q to have relationships", name)
toIds := map[artifact.ID]struct{}{}
for _, rel := range rels {
toIds[rel.To.ID()] = struct{}{}
}
for _, depName := range expectedDepNames {
depId := nameToId[depName]
_, exists := toIds[depId]
require.True(t, exists, "expected package %q to have a relationship to %q", name, depName)
}
}
}

View file

@ -0,0 +1 @@
../../../../../../syft/pkg/cataloger/binary/test-fixtures/elf-test-fixtures

View file

@ -32,7 +32,6 @@ func catalogFixtureImageWithConfig(t *testing.T, fixtureImageName string, cfg *s
cfg.CatalogerSelection = cfg.CatalogerSelection.WithDefaults(pkgcataloging.ImageTag)
// get the fixture image tar file
imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName)
// get the source to build an SBOM against

6
go.mod
View file

@ -84,6 +84,10 @@ require (
modernc.org/sqlite v1.29.9
)
require google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect
require github.com/magiconair/properties v1.8.7
require (
dario.cat/mergo v1.0.0 // indirect
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
@ -153,7 +157,6 @@ require (
github.com/kr/text v0.2.0 // indirect
github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/maruel/natural v1.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
@ -227,7 +230,6 @@ require (
golang.org/x/text v0.15.0 // indirect
golang.org/x/tools v0.19.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect
google.golang.org/grpc v1.59.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect

View file

@ -0,0 +1,171 @@
package binary
import (
"path"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Accessor) []artifact.Relationship {
// TODO: consider library format (e.g. ELF, Mach-O, PE) for the meantime assume all binaries are homogeneous format
// start with building new package-to-package relationships for executables-to-executables
// each relationship must be unique, store in a map[id]map[id]relationship to avoid duplicates
// 1 & 2... build an index of all shared libraries and their owning packages to search against
index := newShareLibIndex(resolver, accessor)
// 3. craft package-to-package relationships for each binary that represent shared library dependencies
//note: we only care about package-to-package relationships
var relIndex *relationshipIndex
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relIndex = newRelationshipIndex(s.Relationships...)
})
return generateRelationships(resolver, accessor, index, relIndex)
}
func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationshipIndex) []artifact.Relationship {
// read all existing dependencyOf relationships
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if r.Type != artifact.DependencyOfRelationship {
continue
}
relIndex.track(r)
}
})
// find all package-to-package relationships for shared library dependencies
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) {
for _, evidentLocation := range parentPkg.Locations.ToSlice() {
if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation {
continue
}
// find all libraries that this package depends on
exec, ok := s.Artifacts.Executables[evidentLocation.Coordinates]
if !ok {
continue
}
populateRelationships(exec, parentPkg, resolver, relIndex, index)
}
}
})
return relIndex.newRelationships()
}
// PackagesToRemove returns a list of binary packages (resolved by the ELF cataloger) that should be removed from the SBOM
// These packages are removed because they are already represented by a higher order packages in the SBOM.
func PackagesToRemove(resolver file.Resolver, accessor sbomsync.Accessor) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
// OTHER > ELF > Binary
pkgsToDelete = append(pkgsToDelete, getBinaryPackagesToDelete(resolver, s)...)
pkgsToDelete = append(pkgsToDelete, compareElfBinaryPackages(resolver, s)...)
})
return pkgsToDelete
}
func compareElfBinaryPackages(resolver file.Resolver, s *sbom.SBOM) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
for _, p := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) {
for _, loc := range p.Locations.ToSlice() {
if loc.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation {
continue
}
locations, err := resolver.FilesByPath(loc.RealPath)
if err != nil {
log.WithFields("error", err).Trace("unable to find path for owned file")
continue
}
for _, ownedL := range locations {
for _, pathPkg := range s.Artifacts.Packages.PackagesByPath(ownedL.RealPath) {
// we only care about comparing binary packages to each other (not other types)
if pathPkg.Type != pkg.BinaryPkg {
continue
}
if _, ok := pathPkg.Metadata.(pkg.ELFBinaryPackageNoteJSONPayload); !ok {
pkgsToDelete = append(pkgsToDelete, pathPkg.ID())
}
}
}
}
}
return pkgsToDelete
}
func getBinaryPackagesToDelete(resolver file.Resolver, s *sbom.SBOM) []artifact.ID {
pkgsToDelete := make([]artifact.ID, 0)
for p := range s.Artifacts.Packages.Enumerate() {
if p.Type == pkg.BinaryPkg {
continue
}
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
continue
}
ownedFiles := fileOwner.OwnedFiles()
locations, err := resolver.FilesByPath(ownedFiles...)
if err != nil {
log.WithFields("error", err).Trace("unable to find path for owned file")
continue
}
for _, loc := range locations {
for _, pathPkg := range s.Artifacts.Packages.PackagesByPath(loc.RealPath) {
if pathPkg.Type == pkg.BinaryPkg {
pkgsToDelete = append(pkgsToDelete, pathPkg.ID())
}
}
}
}
return pkgsToDelete
}
func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationshipIndex, index *sharedLibraryIndex) {
for _, libReference := range exec.ImportedLibraries {
// for each library reference, check s.Artifacts.Packages.Sorted(pkg.BinaryPkg) for a binary package that represents that library
// if found, create a relationship between the parent package and the library package
// if not found do nothing.
// note: we only care about package-to-package relationships
// find the basename of the library
libBasename := path.Base(libReference)
libLocations, err := resolver.FilesByGlob("**/" + libBasename)
if err != nil {
log.WithFields("lib", libReference, "error", err).Trace("unable to resolve library basename")
continue
}
for _, loc := range libLocations {
// are you in our index?
realBaseName := path.Base(loc.RealPath)
pkgCollection := index.owningLibraryPackage(realBaseName)
if pkgCollection.PackageCount() < 1 {
relIndex.add(
artifact.Relationship{
From: loc.Coordinates,
To: parentPkg,
Type: artifact.DependencyOfRelationship,
},
)
}
for _, p := range pkgCollection.Sorted() {
relIndex.add(
artifact.Relationship{
From: p,
To: parentPkg,
Type: artifact.DependencyOfRelationship,
},
)
}
}
}
}

View file

@ -0,0 +1,337 @@
package binary
import (
"path"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func TestPackagesToRemove(t *testing.T) {
glibcCoordinate := file.NewCoordinates("/usr/lib64/libc.so.6", "")
glibCPackage := pkg.Package{
Name: "glibc",
Version: "2.28-236.el8_9.12",
Locations: file.NewLocationSet(
file.NewLocation(glibcCoordinate.RealPath),
),
Type: pkg.RpmPkg,
Metadata: pkg.RpmDBEntry{
Files: []pkg.RpmFileRecord{
{
Path: glibcCoordinate.RealPath,
},
},
},
}
glibCPackage.SetID()
glibCBinaryELFPackage := pkg.Package{
Name: "glibc",
Version: "",
Locations: file.NewLocationSet(
file.NewLocation(glibcCoordinate.RealPath).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
Language: "",
Type: pkg.BinaryPkg,
Metadata: pkg.ELFBinaryPackageNoteJSONPayload{
Type: "testfixture",
Vendor: "syft",
System: "syftsys",
SourceRepo: "https://github.com/someone/somewhere.git",
Commit: "5534c38d0ffef9a3f83154f0b7a7fb6ab0ab6dbb",
},
}
glibCBinaryELFPackage.SetID()
glibCBinaryClassifierPackage := pkg.Package{
Name: "glibc",
Version: "",
Locations: file.NewLocationSet(
file.NewLocation(glibcCoordinate.RealPath).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation),
),
Language: "",
Type: pkg.BinaryPkg,
Metadata: pkg.BinarySignature{},
}
glibCBinaryClassifierPackage.SetID()
tests := []struct {
name string
resolver file.Resolver
accessor sbomsync.Accessor
want []artifact.ID
}{
{
name: "remove packages that are overlapping rpm --> binary",
resolver: file.NewMockResolverForPaths(glibcCoordinate.RealPath),
accessor: newAccesor([]pkg.Package{glibCPackage, glibCBinaryELFPackage}, map[file.Coordinates]file.Executable{}, nil),
want: []artifact.ID{glibCBinaryELFPackage.ID()},
},
{
name: "remove no packages when there is a single binary package",
resolver: file.NewMockResolverForPaths(glibcCoordinate.RealPath),
accessor: newAccesor([]pkg.Package{glibCBinaryELFPackage}, map[file.Coordinates]file.Executable{}, nil),
want: []artifact.ID{},
},
{
name: "remove packages when there is a single binary package and a classifier package",
resolver: file.NewMockResolverForPaths(glibcCoordinate.RealPath),
accessor: newAccesor([]pkg.Package{glibCBinaryELFPackage, glibCBinaryClassifierPackage}, map[file.Coordinates]file.Executable{}, nil),
want: []artifact.ID{glibCBinaryClassifierPackage.ID()},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pkgsToDelete := PackagesToRemove(tt.resolver, tt.accessor)
if diff := cmp.Diff(tt.want, pkgsToDelete); diff != "" {
t.Errorf("unexpected packages to delete (-want, +got): %s", diff)
}
})
}
}
func TestNewDependencyRelationships(t *testing.T) {
// coordinates for the files under test
glibcCoordinate := file.NewCoordinates("/usr/lib64/libc.so.6", "")
secondGlibcCoordinate := file.NewCoordinates("/usr/local/lib64/libc.so.6", "")
nestedLibCoordinate := file.NewCoordinates("/usr/local/bin/elftests/elfbinwithnestedlib/bin/elfbinwithnestedlib", "")
parrallelLibCoordinate := file.NewCoordinates("/usr/local/bin/elftests/elfbinwithsisterlib/bin/elfwithparallellibbin1", "")
// rpm package that was discovered in linked section of the ELF binary package
glibCPackage := pkg.Package{
Name: "glibc",
Version: "2.28-236.el8_9.12",
Locations: file.NewLocationSet(
file.NewLocation(glibcCoordinate.RealPath),
file.NewLocation("some/other/path"),
),
Type: pkg.RpmPkg,
Metadata: pkg.RpmDBEntry{
Files: []pkg.RpmFileRecord{
{
Path: glibcCoordinate.RealPath,
},
{
Path: "some/other/path",
},
},
},
}
glibCPackage.SetID()
// second rpm package that could be discovered in linked section of the ELF binary package (same base path as above)
glibCustomPackage := pkg.Package{
Name: "glibc",
Version: "2.28-236.el8_9.12",
Locations: file.NewLocationSet(file.NewLocation(secondGlibcCoordinate.RealPath)),
Type: pkg.RpmPkg,
Metadata: pkg.RpmDBEntry{
Files: []pkg.RpmFileRecord{
{
Path: secondGlibcCoordinate.RealPath,
},
},
},
}
glibCustomPackage.SetID()
// binary package that is an executable that can link against above rpm packages
syftTestFixturePackage := pkg.Package{
Name: "syfttestfixture",
Version: "0.01",
PURL: "pkg:generic/syftsys/syfttestfixture@0.01",
FoundBy: "",
Locations: file.NewLocationSet(
file.NewLocation(nestedLibCoordinate.RealPath).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
file.NewLocation(parrallelLibCoordinate.RealPath).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation),
),
Language: "",
Type: pkg.BinaryPkg,
Metadata: pkg.ELFBinaryPackageNoteJSONPayload{
Type: "testfixture",
Vendor: "syft",
System: "syftsys",
SourceRepo: "https://github.com/someone/somewhere.git",
Commit: "5534c38d0ffef9a3f83154f0b7a7fb6ab0ab6dbb",
},
}
syftTestFixturePackage.SetID()
// dummy executable representation of glibc
glibcExecutable := file.Executable{
Format: "elf",
HasExports: true,
HasEntrypoint: true,
ImportedLibraries: []string{},
}
// executable representation of the syftTestFixturePackage
syftTestFixtureExecutable := file.Executable{
Format: "elf",
HasExports: true,
HasEntrypoint: true,
ImportedLibraries: []string{
path.Base(glibcCoordinate.RealPath),
},
}
// second executable representation that has no parent package
syftTestFixtureExecutable2 := file.Executable{
Format: "elf",
HasExports: true,
HasEntrypoint: true,
ImportedLibraries: []string{
// this should not be a relationship because it is not a coordinate
"foo.so.6",
},
}
tests := []struct {
name string
resolver file.Resolver
coordinateIndex map[file.Coordinates]file.Executable
packages []pkg.Package
prexistingRelationships []artifact.Relationship
want []artifact.Relationship
}{
{
name: "blank sbom and accessor returns empty relationships",
resolver: nil,
coordinateIndex: map[file.Coordinates]file.Executable{},
packages: []pkg.Package{},
want: make([]artifact.Relationship, 0),
},
{
name: "given a package that imports glibc, expect a relationship between the two packages when the package is an executable",
resolver: file.NewMockResolverForPaths(
glibcCoordinate.RealPath,
nestedLibCoordinate.RealPath,
parrallelLibCoordinate.RealPath,
),
// path -> executable (above mock resolver needs to be able to resolve to paths in this map)
coordinateIndex: map[file.Coordinates]file.Executable{
glibcCoordinate: glibcExecutable,
nestedLibCoordinate: syftTestFixtureExecutable,
parrallelLibCoordinate: syftTestFixtureExecutable2,
},
packages: []pkg.Package{glibCPackage, syftTestFixturePackage},
want: []artifact.Relationship{
{
From: glibCPackage,
To: syftTestFixturePackage,
Type: artifact.DependencyOfRelationship,
},
},
},
{
name: "given an executable maps to one base path represented by two RPM we make two relationships",
resolver: file.NewMockResolverForPaths(
glibcCoordinate.RealPath,
secondGlibcCoordinate.RealPath,
nestedLibCoordinate.RealPath,
parrallelLibCoordinate.RealPath,
),
coordinateIndex: map[file.Coordinates]file.Executable{
glibcCoordinate: glibcExecutable,
secondGlibcCoordinate: glibcExecutable,
nestedLibCoordinate: syftTestFixtureExecutable,
parrallelLibCoordinate: syftTestFixtureExecutable2,
},
packages: []pkg.Package{glibCPackage, glibCustomPackage, syftTestFixturePackage},
want: []artifact.Relationship{
{
From: glibCPackage,
To: syftTestFixturePackage,
Type: artifact.DependencyOfRelationship,
},
{
From: glibCustomPackage,
To: syftTestFixturePackage,
Type: artifact.DependencyOfRelationship,
},
},
},
{
name: "given some dependency relationships already exist, expect no duplicate relationships to be created",
resolver: file.NewMockResolverForPaths(
glibcCoordinate.RealPath,
nestedLibCoordinate.RealPath,
parrallelLibCoordinate.RealPath,
),
coordinateIndex: map[file.Coordinates]file.Executable{
glibcCoordinate: glibcExecutable,
nestedLibCoordinate: syftTestFixtureExecutable,
parrallelLibCoordinate: syftTestFixtureExecutable2,
},
packages: []pkg.Package{glibCPackage, glibCustomPackage, syftTestFixturePackage},
prexistingRelationships: []artifact.Relationship{
{
From: glibCPackage,
To: syftTestFixturePackage,
Type: artifact.DependencyOfRelationship,
},
},
want: []artifact.Relationship{},
},
{
name: "given a package that imports a library that is not tracked by the resolver, expect no relationships to be created",
resolver: file.NewMockResolverForPaths(),
coordinateIndex: map[file.Coordinates]file.Executable{
glibcCoordinate: glibcExecutable,
nestedLibCoordinate: syftTestFixtureExecutable,
parrallelLibCoordinate: syftTestFixtureExecutable2,
},
packages: []pkg.Package{glibCPackage, syftTestFixturePackage},
want: []artifact.Relationship{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
accessor := newAccesor(tt.packages, tt.coordinateIndex, tt.prexistingRelationships)
// given a resolver that knows about the paths of the packages and executables,
// and given an SBOM accessor that knows about the packages and executables,
// we should be able to create a set of relationships between the packages and executables
relationships := NewDependencyRelationships(tt.resolver, accessor)
if diff := relationshipComparer(tt.want, relationships); diff != "" {
t.Errorf("unexpected relationships (-want, +got): %s", diff)
}
})
}
}
func relationshipComparer(x, y []artifact.Relationship) string {
return cmp.Diff(x, y, cmpopts.IgnoreUnexported(
pkg.Package{},
artifact.Relationship{},
file.LocationSet{},
pkg.LicenseSet{},
))
}
func newAccesor(pkgs []pkg.Package, coordinateIndex map[file.Coordinates]file.Executable, prexistingRelationships []artifact.Relationship) sbomsync.Accessor {
sb := sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
}
builder := sbomsync.NewBuilder(&sb)
builder.AddPackages(pkgs...)
accessor := builder.(sbomsync.Accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Artifacts.Executables = coordinateIndex
if prexistingRelationships != nil {
s.Relationships = prexistingRelationships
}
})
return accessor
}

View file

@ -0,0 +1,59 @@
package binary
import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
type relationshipIndex struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
additional []artifact.Relationship
}
func newRelationshipIndex(existing ...artifact.Relationship) *relationshipIndex {
r := &relationshipIndex{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
additional: make([]artifact.Relationship, 0),
}
for _, rel := range existing {
r.track(rel)
}
return r
}
// track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *relationshipIndex) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
}
toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
}
var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
}
i.typesByFromTo[fromID][toID].Add(string(r.Type))
return !exists
}
// add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
// nolint:unparam
func (i *relationshipIndex) add(r artifact.Relationship) bool {
if i.track(r) {
i.additional = append(i.additional, r)
return true
}
return false
}
func (i *relationshipIndex) newRelationships() []artifact.Relationship {
return i.additional
}

View file

@ -0,0 +1,128 @@
package binary
import (
"reflect"
"testing"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
func Test_newRelationshipIndex(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
tests := []struct {
name string
given []artifact.Relationship
want *relationshipIndex
}{
{
name: "newRelationshipIndex returns an empty index with no existing relationships",
want: &relationshipIndex{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
additional: make([]artifact.Relationship, 0),
},
},
{
name: "newRelationshipIndex returns an index which tracks existing relationships",
given: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
want: &relationshipIndex{
typesByFromTo: map[artifact.ID]map[artifact.ID]*strset.Set{
"from": {
"to": strset.New("evident-by"),
},
},
additional: make([]artifact.Relationship, 0),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := newRelationshipIndex(tt.given...); !reflect.DeepEqual(got, tt.want) {
t.Errorf("newRelationshipIndex() = %v, want %v", got, tt.want)
}
})
}
}
func Test_relationshipIndex_track(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "track returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "track returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := newRelationshipIndex(tt.existing...)
if got := i.track(tt.given); got != tt.want {
t.Errorf("track() = %v, want %v", got, tt.want)
}
})
}
}
func Test_relationshipIndex_add(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "add returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "add returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := newRelationshipIndex(tt.existing...)
if got := i.add(tt.given); got != tt.want {
t.Errorf("add() = %v, want %v", got, tt.want)
}
})
}
}
type fakeIdentifiable struct {
id string
}
func (f fakeIdentifiable) ID() artifact.ID {
return artifact.ID(f.id)
}

View file

@ -0,0 +1,131 @@
package binary
import (
"path"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
type sharedLibraryIndex struct {
libLocationsByBasename map[string]file.CoordinateSet
allLibLocations file.CoordinateSet
packagesByLibraryPath map[file.Coordinates]*pkg.Collection
}
func newShareLibIndex(resolver file.Resolver, accessor sbomsync.Accessor) *sharedLibraryIndex {
s := &sharedLibraryIndex{
libLocationsByBasename: make(map[string]file.CoordinateSet),
allLibLocations: file.NewCoordinateSet(),
packagesByLibraryPath: make(map[file.Coordinates]*pkg.Collection),
}
s.build(resolver, accessor)
return s
}
func (i *sharedLibraryIndex) build(resolver file.Resolver, accessor sbomsync.Accessor) {
// 1. map out all locations that provide libraries (indexed by the basename)
i.libLocationsByBasename, i.allLibLocations = locationsThatProvideLibraries(accessor)
// 2. for each library path, find all packages that claim ownership of the library
i.packagesByLibraryPath = packagesWithLibraryOwnership(resolver, accessor, i.allLibLocations)
}
func (i *sharedLibraryIndex) owningLibraryPackage(libraryBasename string) *pkg.Collection {
// find all packages that own a library by its basename
collection := pkg.NewCollection()
if set, ok := i.libLocationsByBasename[libraryBasename]; ok {
for _, coord := range set.ToSlice() {
if pkgSet, ok := i.packagesByLibraryPath[coord]; ok {
toAdd := pkgSet.Sorted()
collection.Add(toAdd...)
}
}
}
return collection
}
func locationsThatProvideLibraries(accessor sbomsync.Accessor) (map[string]file.CoordinateSet, file.CoordinateSet) {
// map out all locations that provide libraries (indexed by the basename)
libLocationsByBasename := make(map[string]file.CoordinateSet)
allLibLocations := file.NewCoordinateSet()
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
// PROBLEM: this does not consider all symlinks to real paths that are libraries
for coord, f := range s.Artifacts.Executables {
if !f.HasExports {
continue
}
basename := path.Base(coord.RealPath)
set := libLocationsByBasename[basename]
set.Add(coord)
allLibLocations.Add(coord)
libLocationsByBasename[basename] = set
}
})
return libLocationsByBasename, allLibLocations
}
func packagesWithLibraryOwnership(resolver file.Resolver, accessor sbomsync.Accessor, allLibLocations file.CoordinateSet) map[file.Coordinates]*pkg.Collection {
// map out all packages that claim ownership of a library at a specific path
packagesByLibraryPath := make(map[file.Coordinates]*pkg.Collection)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, p := range s.Artifacts.Packages.Sorted() {
var ownedFilePaths []string
if p.Type == pkg.BinaryPkg {
for _, loc := range p.Locations.ToSlice() {
ownedFilePaths = append(ownedFilePaths, loc.Path())
}
} else {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
continue
}
ownedFilePaths = fileOwner.OwnedFiles()
}
packagesByLibraryPath = populatePackagesByLibraryPath(resolver, allLibLocations, packagesByLibraryPath, p, ownedFilePaths)
}
})
return packagesByLibraryPath
}
func populatePackagesByLibraryPath(
resolver file.Resolver,
allLibLocations file.CoordinateSet,
packagesByLibraryPath map[file.Coordinates]*pkg.Collection,
p pkg.Package,
ownedFilePaths []string,
) map[file.Coordinates]*pkg.Collection {
for _, pth := range ownedFilePaths {
ownedLocation, err := resolver.FilesByPath(pth)
if err != nil {
log.WithFields("error", err, "path", pth).Trace("unable to find path for owned file")
continue
}
for _, loc := range ownedLocation {
// if the location is a library, add the package to the set of packages that own the library
if !allLibLocations.Contains(loc.Coordinates) {
continue
}
if _, ok := packagesByLibraryPath[loc.Coordinates]; !ok {
packagesByLibraryPath[loc.Coordinates] = pkg.NewCollection()
}
// we have a library path, add the package to the set of packages that own the library
packagesByLibraryPath[loc.Coordinates].Add(p)
}
}
return packagesByLibraryPath
}

View file

@ -0,0 +1,105 @@
package binary
import (
"path"
"testing"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func Test_newShareLibIndex(t *testing.T) {
tests := []struct {
name string
resolver file.Resolver
coordinateIndex map[file.Coordinates]file.Executable
packages []pkg.Package
prexistingRelationships []artifact.Relationship
}{
{
name: "constructor",
resolver: file.NewMockResolverForPaths(),
coordinateIndex: map[file.Coordinates]file.Executable{},
packages: []pkg.Package{},
prexistingRelationships: []artifact.Relationship{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
accessor := newAccesor(tt.packages, tt.coordinateIndex, tt.prexistingRelationships)
sharedLibraryIndex := newShareLibIndex(tt.resolver, accessor)
if sharedLibraryIndex == nil {
t.Errorf("newShareLibIndex() = %v, want non-nil", sharedLibraryIndex)
}
})
}
}
func Test_sharedLibraryIndex_build(t *testing.T) {
glibcCoordinate := file.NewCoordinates("/usr/lib64/libc.so.6", "")
secondGlibcCoordinate := file.NewCoordinates("/usr/local/lib64/libc.so.6", "")
glibcExecutable := file.Executable{
Format: "elf",
HasExports: true,
HasEntrypoint: true,
ImportedLibraries: []string{
path.Base(glibcCoordinate.RealPath),
path.Base(secondGlibcCoordinate.RealPath),
},
}
glibCPackage := pkg.Package{
Name: "glibc",
Version: "2.28-236.el8_9.12",
Locations: file.NewLocationSet(
file.NewLocation(glibcCoordinate.RealPath),
file.NewLocation("some/other/path"),
),
Type: pkg.RpmPkg,
Metadata: pkg.RpmDBEntry{
Files: []pkg.RpmFileRecord{
{
Path: glibcCoordinate.RealPath,
},
{
Path: "some/other/path",
},
},
},
}
tests := []struct {
name string
resolver file.Resolver
coordinateIndex map[file.Coordinates]file.Executable
packages []pkg.Package
prexistingRelationships []artifact.Relationship
}{
{
name: "build with locations and packages",
resolver: file.NewMockResolverForPaths([]string{
glibcCoordinate.RealPath,
secondGlibcCoordinate.RealPath,
}...),
coordinateIndex: map[file.Coordinates]file.Executable{
glibcCoordinate: glibcExecutable,
secondGlibcCoordinate: glibcExecutable,
},
packages: []pkg.Package{
glibCPackage,
},
prexistingRelationships: []artifact.Relationship{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
accessor := newAccesor(tt.packages, tt.coordinateIndex, tt.prexistingRelationships)
sharedLibraryIndex := newShareLibIndex(tt.resolver, accessor)
sharedLibraryIndex.build(tt.resolver, accessor)
pkgs := sharedLibraryIndex.owningLibraryPackage(path.Base(glibcCoordinate.RealPath))
if pkgs.PackageCount() < 1 {
t.Errorf("owningLibraryPackage() = %v, want non-empty", pkgs)
}
})
}
}

View file

@ -0,0 +1 @@
../../../../syft/pkg/cataloger/binary/test-fixtures/elf-test-fixtures

View file

@ -1,15 +1,21 @@
package relationship
import (
"github.com/anchore/syft/internal/relationship/binary"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
)
func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
func Finalize(resolver file.Resolver, builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
accessor := builder.(sbomsync.Accessor)
// remove ELF packages and Binary packages that are already
// represented by a source package (e.g. a package that is evident by some package manager)
builder.DeletePackages(binary.PackagesToRemove(resolver, accessor)...)
// add relationships showing packages that are evident by a file which is owned by another package (package-to-package)
if cfg.PackageFileOwnershipOverlap {
byFileOwnershipOverlapWorker(accessor)
@ -21,6 +27,12 @@ func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src
excludeBinariesByFileOwnershipOverlap(accessor)
}
// add the new relationships for executables to the SBOM
newBinaryRelationships := binary.NewDependencyRelationships(resolver, accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, newBinaryRelationships...)
})
builder.AddRelationships(newBinaryRelationships...)
// add source "contains package" relationship (source-to-package)
var sourceRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
@ -33,5 +45,6 @@ func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
evidentByRelationships = evidentBy(s.Artifacts.Packages)
})
builder.AddRelationships(evidentByRelationships...)
}

View file

@ -20,6 +20,8 @@ type Builder interface {
AddPackages(...pkg.Package)
DeletePackages(...artifact.ID)
// edges
AddRelationships(...artifact.Relationship)
@ -78,6 +80,34 @@ func (b sbomBuilder) AddPackages(p ...pkg.Package) {
b.onWriteEvent()
}
func (b sbomBuilder) DeletePackages(ids ...artifact.ID) {
b.lock.Lock()
defer b.lock.Unlock()
deleted := make(map[artifact.ID]struct{})
for _, id := range ids {
b.sbom.Artifacts.Packages.Delete(id)
deleted[id] = struct{}{}
}
// remove any relationships that reference the deleted packages
var relationships []artifact.Relationship
for _, rel := range b.sbom.Relationships {
if _, ok := deleted[rel.From.ID()]; ok {
continue
}
if _, ok := deleted[rel.To.ID()]; ok {
continue
}
// only keep relationships that don't reference the deleted packages
relationships = append(relationships, rel)
}
b.sbom.Relationships = relationships
b.onWriteEvent()
}
func (b sbomBuilder) AddRelationships(relationship ...artifact.Relationship) {
b.lock.Lock()
defer b.lock.Unlock()

View file

@ -0,0 +1,98 @@
package sbomsync
import (
"testing"
"github.com/magiconair/properties/assert"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func TestNewBuilder(t *testing.T) {
tests := []struct {
name string
sbom sbom.SBOM
}{
{
"TestNewBuilder with empty sbom",
sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
builder := NewBuilder(&tt.sbom)
builder.AddPackages(pkg.Package{})
accessor := builder.(Accessor)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
packageCount := s.Artifacts.Packages.PackageCount()
assert.Equal(t, packageCount, 1, "expected 1 package in sbom")
})
})
}
}
func Test_sbomBuilder_DeletePackages(t *testing.T) {
testPackage := pkg.Package{
Name: "test",
Version: "1.0.0",
Type: pkg.DebPkg,
}
testPackage.SetID()
keepMe := pkg.Package{
Name: "keepMe",
Version: "1.0.0",
Type: pkg.DebPkg,
}
prexistingRelationships := []artifact.Relationship{
{
From: testPackage,
To: testPackage,
Type: artifact.DependencyOfRelationship,
},
}
tests := []struct {
name string
sbom sbom.SBOM
}{
{
"Test_sbomBuilder_DeletePackages deletes a given package",
sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
builder := NewBuilder(&tt.sbom)
builder.AddPackages(testPackage, keepMe)
accessor := builder.(Accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = prexistingRelationships
})
builder.DeletePackages(testPackage.ID())
newAccess := builder.(Accessor)
newAccess.ReadFromSBOM(func(s *sbom.SBOM) {
packageCount := s.Artifacts.Packages.PackageCount()
// deleted target package
assert.Equal(t, packageCount, 1, "expected 1 packages in sbom")
relationshipCount := len(s.Relationships)
// deleted relationships that reference the deleted package
assert.Equal(t, relationshipCount, 0, "expected 0 relationships in sbom")
})
})
}
}

View file

@ -22,8 +22,9 @@ func (s sourceIdentifierAdapter) ID() artifact.ID {
}
func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Description) Task {
fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error {
fn := func(_ context.Context, resolver file.Resolver, builder sbomsync.Builder) error {
relationship.Finalize(
resolver,
builder,
cfg,
&sourceIdentifierAdapter{desc: src})

View file

@ -13,6 +13,13 @@ type Coordinates struct {
FileSystemID string `json:"layerID,omitempty" cyclonedx:"layerID"` // An ID representing the filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank.
}
func NewCoordinates(realPath, fsID string) Coordinates {
return Coordinates{
RealPath: realPath,
FileSystemID: fsID,
}
}
func (c Coordinates) ID() artifact.ID {
f, err := artifact.IDByHash(c)
if err != nil {

View file

@ -0,0 +1,135 @@
## Summary
This image illustrates a few examples of how ELF executables can be assembled and illustrated in an SBOM.
### Example 1: elf-test-fixtures/elfbinwithsisterlib
This example builds two binaries with srcs found in elfsrc1 and elfsrc2.
- 3 separate libs, two with the same name, 1 different, all different locations, but same output when:
```
objdump -s -j .note.package /usr/local/bin/elftests/elfbinwithnestedlib/bin/lib/libhello_world.so
/usr/local/bin/elftests/elfbinwithnestedlib/bin/lib/libhello_world.so: file format elf64-littleaarch64
Contents of section .note.package:
0000 7b227479 7065223a 20227465 73746669 {"type": "testfi
0010 78747572 65222c22 6c696365 6e736522 xture","license"
0020 3a224d49 54222c22 636f6d6d 6974223a :"MIT","commit":
0030 22353533 34633338 64306666 65663961 "5534c38d0ffef9a
0040 33663833 31353466 30623761 37666236 3f83154f0b7a7fb6
0050 61623061 62366462 62222c22 736f7572 ab0ab6dbb","sour
0060 63655265 706f223a 22687474 70733a2f ceRepo":"https:/
0070 2f676974 6875622e 636f6d2f 736f6d65 /github.com/some
0080 6f6e652f 736f6d65 77686572 652e6769 one/somewhere.gi
0090 74222c22 76656e64 6f72223a 20227379 t","vendor": "sy
00a0 6674222c 22737973 74656d22 3a202273 ft","system": "s
00b0 79667473 7973222c 226e616d 65223a20 yftsys","name":
00c0 226c6962 68656c6c 6f5f776f 726c642e "libhello_world.
00d0 736f222c 22766572 73696f6e 223a2022 so","version": "
00e0 302e3031 222c2270 75726c22 3a202270 0.01","purl": "p
00f0 6b673a67 656e6572 69632f73 79667473 kg:generic/syfts
0100 79732f73 79667474 65737466 69787475 ys/syfttestfixtu
0110 72654030 2e303122 2c226370 65223a20 re@0.01","cpe":
0120 22637065 3a2f6f3a 73796674 3a737966 "cpe:/o:syft:syf
0130 74737973 5f746573 74666978 74757265 tsys_testfixture
0140 5f737966 74746573 74666978 74757265 _syfttestfixture
0150 3a302e30 31227d0a :0.01"}.
```
### Binaries
```
/usr/local/bin/elftests/elfbinwithnestedlib/bin/elfbinwithnestedlib
/usr/local/bin/elftests/elfbinwithsisterlib/bin/elfwithparallellibbin2
/usr/local/bin/elftests/elfbinwithsisterlib/bin/elfwithparallellibbin1
```
#### Libraries
```
/usr/local/bin/elftests/elfbinwithnestedlib/bin/lib/libhello_world.so
/usr/local/bin/elftests/elfbinwithsisterlib/lib/libhello_world.so
/usr/local/bin/elftests/elfbinwithsisterlib/lib/libhello_world2.so
```
#### Binaries related to Libraries
The resulting SBOM shoult show the following relationships:
```
elfbinwithnestedlib -> libhello_world.so
elfwithparallellibbin2 -> libhello_world.so
elfwithparallellibbin1 -> libhello_world2.so
```
#### Desired State
We want to drop the package to file relationships and instead do package to package
Single relationship
ElfPackage `libhellp_world.so` -> ElfPackage `syfttestfixture` library
Also relationship between the binaries and the rpm packages transitive dependencies that come from the library
#### Actual state
```mermaid
flowchart
nested(.../bin/elfbinwithnestedlib)
parallel1(.../bin/elfwithparallellibbin1)
parallel2(.../bin/elfwithparallellibbin2)
nestedLib(.../nested/bin/lib/libhello_world.so)
sisterLib1(.../sister/lib/libhello_world.so)
sisterLib2(.../sister/lib/libhello_world2.so)
libc(libc.so.6)
libstdc(libstdc++.so.6)
nested --> |imports ../bin/lib/libhello_world.so| nestedLib
nested --> |imports libhello_world.so| sisterLib1
nested --> |imports libstdc++.so.6| libstdc
nested --> |imports libc.so.6| libc
nestedLib --> |imports libc.so.6| libc
sisterLib1 --> |imports libc.so.6| libc
parallel1 --> nestedLib
parallel1 --> sisterLib1
parallel1 --> libstdc
parallel1 --> libc
parallel2 --> |imports ../lib/libhello_world2.so| sisterLib2
parallel2 --> |imports libhello_world2.so| sisterLib2
parallel2 --> libstdc
parallel2 --> libc
sisterLib2 --> libc
```
#### Desired relationships
```mermaid
flowchart LR
%% Data sync workflow...
subgraph logicalAppPackage [ELF Package - 'syfttestfixture']
nested(.../bin/elfbinwithnestedlib)
parallel1(.../bin/elfwithparallellibbin1)
parallel2(.../bin/elfwithparallellibbin2)
end
subgraph logicalLibPackage [ELF Package - 'libhello_world.so']
nestedLib(.../nested/bin/lib/libhello_world.so)
sisterLib1(.../sister/lib/libhello_world.so)
sisterLib2(.../sister/lib/libhello_world2.so)
end
logicalLibPackage --> |dependency-of| logicalAppPackage
%% RPM packages
libstdc(libstdc++) --> |dependency-of| logicalAppPackage
glibc(glibc) --> |dependency-of| logicalAppPackage
glibc(glibc) --> |dependency-of| logicalLibPackage
```

View file

@ -75,15 +75,22 @@ func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.Relationship
rt = artifact.AllRelationshipTypes()
}
pID := p.ID()
var relationships []artifact.Relationship
for _, relationship := range s.Relationships {
if relationship.From == nil || relationship.To == nil {
log.Debugf("relationship has nil edge, skipping: %#v", relationship)
continue
}
if relationship.From.ID() != p.ID() {
fromID := relationship.From.ID()
toID := relationship.To.ID()
hasPkgID := fromID == pID || toID == pID
if !hasPkgID {
continue
}
// check if the relationship is one we're searching for; rt is inclusive
if !slices.ContainsFunc(rt, func(r artifact.RelationshipType) bool { return relationship.Type == r }) {
continue