[wip] prototype binary relationships

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-03-13 18:09:19 -04:00
parent 1b3e57c264
commit 78ad3d648f
5 changed files with 286 additions and 2 deletions

View file

@ -0,0 +1,97 @@
package binary
import (
"path"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Accessor) []artifact.Relationship {
// TODO: consider library format (e.g. ELF, Mach-O, PE) for the meantime assume all binaries are homogeneous format
// start with building new file-to-file relationships for executables-to-executables
// you need to make certain that they are unique, store in a map[id]map[id]relationship to avoid dupes.
// before creating the new file-to-file relationship, check to see if there are packages that represent each
// file. If there are, create a package-to-package, file-to-package, or package-to-file relationship as appropriate.
// 1 & 2... build an index of all shared libraries and their owning packages to search against
index := newShareLibIndex(resolver, accessor)
// 3. craft package-to-package or package-to-file relationships that represent binary shared library dependencies
// note: prefer package-to-package relationships over package-to-file relationships
relIndex := newRelationshipIndex()
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
// read all existing dependencyOf relationships
for _, r := range s.Relationships {
if r.Type != artifact.DependencyOfRelationship {
continue
}
relIndex.track(r)
}
})
// find all new relationships to add...
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) {
for _, evidentLocation := range parentPkg.Locations.ToSlice() {
if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation {
continue
}
// find all libraries that this package depends on
exec, ok := s.Artifacts.Executables[evidentLocation.Coordinates]
if !ok {
continue
}
for _, libReference := range exec.ImportedLibraries {
// TODO: is this always a basename? technically no, it could be a path...
libBasename := path.Base(libReference)
pkgsThatOwnLib := index.owningLibraryPackage(libBasename)
if pkgsThatOwnLib == nil {
// create package-to-file relationship...
// if there is more than one library for this given library name, then we will include
// all of them as dependencies since we don't know the LD_LIBRARY_PATH order
// TODO: add configuration for LD_LIBRARY_PATH order?
for _, libCoord := range index.owningLibraryLocations(libBasename).ToSlice() {
relIndex.add(
artifact.Relationship{
From: libCoord,
To: parentPkg,
Type: artifact.DependencyOfRelationship,
},
)
}
// don't create a package-to-package relationship for this library... since we can't
continue
}
// create a package-to-package relationship between the binary package and the library package
// if there is more than one library for this given library name, then we will include
// all of them as dependencies since we don't know the LD_LIBRARY_PATH order
for _, pkgThatOwnsLib := range pkgsThatOwnLib.Sorted() {
relIndex.add(
artifact.Relationship{
From: pkgThatOwnsLib,
To: parentPkg,
Type: artifact.DependencyOfRelationship,
},
)
}
}
}
}
})
// so far this handles the first order dependencies from the binary package. Odds are that the OS package manager
// will have already created a package-to-package relationship for the lib packages to other lib packages.
return relIndex.newRelationships()
}

View file

@ -0,0 +1,58 @@
package binary
import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
type relationshipIndex struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
additional []artifact.Relationship
}
func newRelationshipIndex(existing ...artifact.Relationship) *relationshipIndex {
r := &relationshipIndex{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
additional: make([]artifact.Relationship, 0),
}
for _, rel := range existing {
r.track(rel)
}
return r
}
// track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *relationshipIndex) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
}
toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
}
var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
}
i.typesByFromTo[fromID][toID].Add(string(r.Type))
return exists
}
// add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
func (i *relationshipIndex) add(r artifact.Relationship) bool {
if !i.track(r) {
i.additional = append(i.additional, r)
return true
}
return false
}
func (i *relationshipIndex) newRelationships() []artifact.Relationship {
return i.additional
}

View file

@ -0,0 +1,117 @@
package binary
import (
"path"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
type sharedLibraryIndex struct {
libLocationsByBasename map[string]file.CoordinateSet
allLibLocations file.CoordinateSet
packagesByLibraryPath map[file.Coordinates]*pkg.Collection
}
func newShareLibIndex(resolver file.Resolver, accessor sbomsync.Accessor) *sharedLibraryIndex {
s := &sharedLibraryIndex{
libLocationsByBasename: make(map[string]file.CoordinateSet),
allLibLocations: file.NewCoordinateSet(),
packagesByLibraryPath: make(map[file.Coordinates]*pkg.Collection),
}
s.build(resolver, accessor)
return s
}
func (i *sharedLibraryIndex) build(resolver file.Resolver, accessor sbomsync.Accessor) {
// 1. map out all locations that provide libraries (indexed by the basename)
i.libLocationsByBasename, i.allLibLocations = locationsThatProvideLibraries(accessor)
// 2. for each library path, find all packages that claim ownership of the library
i.packagesByLibraryPath = packagesWithLibraryOwnership(resolver, accessor, i.allLibLocations)
}
func (i *sharedLibraryIndex) owningLibraryLocations(libraryBasename string) file.CoordinateSet {
if set, ok := i.libLocationsByBasename[libraryBasename]; ok {
return set
}
return file.NewCoordinateSet()
}
func (i *sharedLibraryIndex) owningLibraryPackage(libraryBasename string) *pkg.Collection {
// find all packages that own a library by it's basename
if set, ok := i.libLocationsByBasename[libraryBasename]; ok {
for _, coord := range set.ToSlice() {
if pkgSet, ok := i.packagesByLibraryPath[coord]; ok {
return pkgSet
}
}
}
return nil
}
func locationsThatProvideLibraries(accessor sbomsync.Accessor) (map[string]file.CoordinateSet, file.CoordinateSet) {
// map out all locations that provide libraries (indexed by the basename)
libLocationsByBasename := make(map[string]file.CoordinateSet)
allLibLocations := file.NewCoordinateSet()
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for coord, f := range s.Artifacts.Executables {
if !f.HasExports {
continue
}
basename := path.Base(coord.RealPath)
set := libLocationsByBasename[basename]
set.Add(coord)
allLibLocations.Add(coord)
libLocationsByBasename[basename] = set
}
})
return libLocationsByBasename, allLibLocations
}
func packagesWithLibraryOwnership(resolver file.Resolver, accessor sbomsync.Accessor, allLibLocations file.CoordinateSet) map[file.Coordinates]*pkg.Collection {
// map out all packages that claim ownership of a library at a specific path
packagesByLibraryPath := make(map[file.Coordinates]*pkg.Collection)
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, p := range s.Artifacts.Packages.Sorted() {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
continue
}
for _, pth := range fileOwner.OwnedFiles() {
ownedLocation, err := resolver.FilesByPath(pth)
if err != nil {
log.WithFields("error", err, "path", pth).Trace("unable to find path for owned file")
continue
}
for _, loc := range ownedLocation {
// if the location is a library, add the package to the set of packages that own the library
if !allLibLocations.Contains(loc.Coordinates) {
continue
}
if _, ok := packagesByLibraryPath[loc.Coordinates]; !ok {
packagesByLibraryPath[loc.Coordinates] = pkg.NewCollection()
}
// we have a library path, add the package to the set of packages that own the library
packagesByLibraryPath[loc.Coordinates].Add(p)
}
}
}
})
return packagesByLibraryPath
}

View file

@ -1,15 +1,20 @@
package relationship
import (
"github.com/anchore/syft/internal/relationship/binary"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
)
func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
func Finalize(resolver file.Resolver, builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
accessor := builder.(sbomsync.Accessor)
// remove ELF packages that are already represented by a non-ELF package
// TODO (also, how should we update the TUI to reflect that we removed packages?)
// add relationships showing packages that are evident by a file which is owned by another package (package-to-package)
if cfg.PackageFileOwnershipOverlap {
byFileOwnershipOverlapWorker(accessor)
@ -21,6 +26,12 @@ func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src
excludeBinariesByFileOwnershipOverlap(accessor)
}
// add the new relationships for executables to the SBOM
newBinaryRelationships := binary.NewDependencyRelationships(resolver, accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, newBinaryRelationships...)
})
// add source "contains package" relationship (source-to-package)
var sourceRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {

View file

@ -22,8 +22,9 @@ func (s sourceIdentifierAdapter) ID() artifact.ID {
}
func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Description) Task {
fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error {
fn := func(_ context.Context, resolver file.Resolver, builder sbomsync.Builder) error {
relationship.Finalize(
resolver,
builder,
cfg,
&sourceIdentifierAdapter{desc: src})