add support for RPM DB package relationships (#2872)

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-05-14 13:48:19 -04:00 committed by GitHub
parent e767bcff4b
commit 7ad7627d5d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 2929 additions and 3 deletions

View file

@ -3,5 +3,5 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.8"
JSONSchemaVersion = "16.0.9"
)

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.8/document",
"$id": "anchore.io/schema/syft/json/16.0.9/document",
"$ref": "#/$defs/Document",
"$defs": {
"AlpmDbEntry": {
@ -2104,6 +2104,18 @@
"modularityLabel": {
"type": "string"
},
"provides": {
"items": {
"type": "string"
},
"type": "array"
},
"requires": {
"items": {
"type": "string"
},
"type": "array"
},
"files": {
"items": {
"$ref": "#/$defs/RpmFileRecord"
@ -2160,6 +2172,18 @@
"modularityLabel": {
"type": "string"
},
"provides": {
"items": {
"type": "string"
},
"type": "array"
},
"requires": {
"items": {
"type": "string"
},
"type": "array"
},
"files": {
"items": {
"$ref": "#/$defs/RpmFileRecord"

View file

@ -7,8 +7,10 @@ import (
"database/sql"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
)
// NewDBCataloger returns a new RPM DB cataloger object.
@ -20,7 +22,23 @@ func NewDBCataloger() pkg.Cataloger {
return generic.NewCataloger("rpm-db-cataloger").
WithParserByGlobs(parseRpmDB, pkg.RpmDBGlob).
WithParserByGlobs(parseRpmManifest, pkg.RpmManifestGlob)
WithParserByGlobs(parseRpmManifest, pkg.RpmManifestGlob).
WithProcessors(dependency.Processor(dbEntryDependencySpecifier), denySelfReferences)
}
func denySelfReferences(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
// it can be common for dependency evidence to be self-referential (e.g. bash depends on bash), which is not useful
// for the dependency graph, thus we remove these cases
for i := 0; i < len(rels); i++ {
if rels[i].Type != artifact.DependencyOfRelationship {
continue
}
if rels[i].From.ID() == rels[i].To.ID() {
rels = append(rels[:i], rels[i+1:]...)
i--
}
}
return pkgs, rels, err
}
// NewArchiveCataloger returns a new RPM file cataloger object.

View file

@ -1,11 +1,191 @@
package redhat
import (
"errors"
"testing"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
_ "modernc.org/sqlite"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func Test_DBCataloger(t *testing.T) {
dbLocation := file.NewLocation("/var/lib/rpm/rpmdb.sqlite")
locations := file.NewLocationSet(dbLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
basePkg := pkg.Package{
Name: "basesystem",
Version: "11-13.el9",
Type: pkg.RpmPkg,
Locations: locations,
Licenses: pkg.NewLicenseSet(pkg.NewLicenseFromLocations("Public Domain", dbLocation)),
FoundBy: "rpm-db-cataloger",
PURL: "pkg:rpm/basesystem@11-13.el9?arch=noarch&upstream=basesystem-11-13.el9.src.rpm",
Metadata: pkg.RpmDBEntry{
Name: "basesystem",
Version: "11",
Arch: "noarch",
Release: "13.el9",
SourceRpm: "basesystem-11-13.el9.src.rpm",
Size: 0,
Vendor: "Rocky Enterprise Software Foundation",
Provides: []string{"basesystem"},
Requires: []string{
"filesystem",
"rpmlib(CompressedFileNames)",
"rpmlib(FileDigests)",
"rpmlib(PayloadFilesHavePrefix)",
"rpmlib(PayloadIsZstd)",
"setup",
},
ModularityLabel: strRef(""),
},
}
basePkg.SetID()
bashPkg := pkg.Package{
Name: "bash",
Version: "5.1.8-6.el9_1",
Type: pkg.RpmPkg,
Locations: locations,
Licenses: pkg.NewLicenseSet(pkg.NewLicenseFromLocations("GPLv3+", dbLocation)),
FoundBy: "rpm-db-cataloger",
PURL: "pkg:rpm/bash@5.1.8-6.el9_1?arch=x86_64&upstream=bash-5.1.8-6.el9_1.src.rpm",
Metadata: pkg.RpmDBEntry{
Name: "bash",
Version: "5.1.8",
Arch: "x86_64",
Release: "6.el9_1",
SourceRpm: "bash-5.1.8-6.el9_1.src.rpm",
Size: 7738634,
ModularityLabel: strRef(""),
Vendor: "Rocky Enterprise Software Foundation",
Provides: []string{
"/bin/bash",
"/bin/sh",
"bash",
"bash(x86-64)",
"config(bash)",
},
Requires: []string{
"/usr/bin/sh",
"config(bash)",
"filesystem",
"libc.so.6()(64bit)",
"libc.so.6(GLIBC_2.11)(64bit)",
"libc.so.6(GLIBC_2.14)(64bit)",
"libc.so.6(GLIBC_2.15)(64bit)",
"libc.so.6(GLIBC_2.2.5)(64bit)",
"libc.so.6(GLIBC_2.25)(64bit)",
"libc.so.6(GLIBC_2.3)(64bit)",
"libc.so.6(GLIBC_2.3.4)(64bit)",
"libc.so.6(GLIBC_2.33)(64bit)",
"libc.so.6(GLIBC_2.34)(64bit)",
"libc.so.6(GLIBC_2.4)(64bit)",
"libc.so.6(GLIBC_2.8)(64bit)",
"libtinfo.so.6()(64bit)",
"rpmlib(BuiltinLuaScripts)",
"rpmlib(CompressedFileNames)",
"rpmlib(FileDigests)",
"rpmlib(PayloadFilesHavePrefix)",
"rpmlib(PayloadIsZstd)",
"rtld(GNU_HASH)",
},
},
}
bashPkg.SetID()
filesystemPkg := pkg.Package{
Name: "filesystem",
Version: "3.16-2.el9",
Type: pkg.RpmPkg,
Locations: locations,
Licenses: pkg.NewLicenseSet(pkg.NewLicenseFromLocations("Public Domain", dbLocation)),
FoundBy: "rpm-db-cataloger",
PURL: "pkg:rpm/filesystem@3.16-2.el9?arch=x86_64&upstream=filesystem-3.16-2.el9.src.rpm",
Metadata: pkg.RpmDBEntry{
Name: "filesystem",
Version: "3.16",
Arch: "x86_64",
Release: "2.el9",
SourceRpm: "filesystem-3.16-2.el9.src.rpm",
Size: 106,
ModularityLabel: strRef(""),
Vendor: "Rocky Enterprise Software Foundation",
Provides: []string{
"filesystem",
"filesystem(x86-64)",
"filesystem-afs",
},
Requires: []string{
"/bin/sh",
"rpmlib(BuiltinLuaScripts)",
"rpmlib(CompressedFileNames)",
"rpmlib(FileDigests)",
"rpmlib(PayloadFilesHavePrefix)",
"rpmlib(PayloadIsZstd)",
"setup",
},
},
}
filesystemPkg.SetID()
expectedPackages := []pkg.Package{basePkg, bashPkg, filesystemPkg}
// Note that you'll see a cycle:
// bash --(requires)--> filesystem
// filesystem --(requires)--> bash
//
// This is not a bug!
//
// [root@c1a4773e8a8d /]# dnf repoquery --requires --resolve filesystem
// bash-0:5.1.8-9.el9.aarch64
// setup-0:2.13.7-10.el9.noarch
//
//[root@c1a4773e8a8d /]# dnf repoquery --requires --resolve bash
// filesystem-0:3.16-2.el9.aarch64
// glibc-0:2.34-100.el9.aarch64
// ncurses-libs-0:6.2-10.20210508.el9.aarch64
expectedRelationships := []artifact.Relationship{
// though this is expressible in the RPM DB (package depends on itself), we do not allow for it in the SBOM
//{
// From: bashPkg,
// To: bashPkg,
// Type: artifact.DependencyOfRelationship,
//},
{
From: bashPkg,
To: filesystemPkg,
Type: artifact.DependencyOfRelationship,
},
{
From: filesystemPkg,
To: basePkg,
Type: artifact.DependencyOfRelationship,
},
{
From: filesystemPkg,
To: bashPkg,
Type: artifact.DependencyOfRelationship,
},
}
pkgtest.NewCatalogTester().
WithImageResolver(t, "image-minimal").
IgnoreLocationLayer(). // this fixture can be rebuilt, thus the layer ID will change
WithCompareOptions(cmpopts.IgnoreFields(pkg.RpmDBEntry{}, "Files")). // this is rather long... ano not the point of the test
Expects(expectedPackages, expectedRelationships).
TestCataloger(t, NewDBCataloger())
}
func Test_DBCataloger_Globs(t *testing.T) {
tests := []struct {
name string
@ -64,3 +244,93 @@ func Test_RPMFileCataloger_Globs(t *testing.T) {
})
}
}
func Test_denySelfReferences(t *testing.T) {
a := pkg.Package{
Name: "a",
}
a.SetID()
b := pkg.Package{
Name: "b",
}
b.SetID()
c := pkg.Package{
Name: "c",
}
c.SetID()
pkgs := []pkg.Package{a, b, c}
tests := []struct {
name string
pkgs []pkg.Package
rels []artifact.Relationship
err error
wantPkgs int
wantRelationships int
wantErr assert.ErrorAssertionFunc
}{
{
name: "no self references",
pkgs: pkgs,
rels: []artifact.Relationship{
{
From: a,
To: b,
Type: artifact.DependencyOfRelationship,
},
},
wantPkgs: 3,
wantRelationships: 1,
wantErr: assert.NoError,
},
{
name: "remove self references",
pkgs: pkgs,
rels: []artifact.Relationship{
{
From: a,
To: a,
Type: artifact.DependencyOfRelationship,
},
{
From: a,
To: b,
Type: artifact.DependencyOfRelationship,
},
},
wantPkgs: 3,
wantRelationships: 1,
wantErr: assert.NoError,
},
{
name: "preserve errors",
pkgs: pkgs,
rels: []artifact.Relationship{
{
From: a,
To: b,
Type: artifact.DependencyOfRelationship,
},
},
err: errors.New("stop me!"),
wantPkgs: 3,
wantRelationships: 1,
wantErr: assert.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = assert.NoError
}
gotPkgs, gotRels, err := denySelfReferences(tt.pkgs, tt.rels, tt.err)
tt.wantErr(t, err)
assert.Len(t, gotPkgs, tt.wantPkgs)
assert.Len(t, gotRels, tt.wantRelationships)
})
}
}

View file

@ -0,0 +1,66 @@
package redhat
import (
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
)
var _ dependency.Specifier = dbEntryDependencySpecifier
func dbEntryDependencySpecifier(p pkg.Package) dependency.Specification {
meta, ok := p.Metadata.(pkg.RpmDBEntry)
if !ok {
log.Tracef("cataloger failed to extract rpmdb metadata for package %+v", p.Name)
return dependency.Specification{}
}
provides := []string{p.Name}
for _, key := range meta.Provides {
if key == "" {
continue
}
if !isSupportedKey(key) {
continue
}
provides = append(provides, key)
}
// all owned files are also considered "provides" for the package
for _, f := range meta.Files {
provides = append(provides, f.Path)
}
var requires []string
for _, key := range meta.Requires {
if key == "" {
continue
}
if !isSupportedKey(key) {
continue
}
requires = append(requires, key)
}
return dependency.Specification{
Provides: provides,
Requires: requires,
}
}
func isSupportedKey(key string) bool {
// '(' indicates the start of a boolean expression, which is not supported in syft at this time.
// See https://rpm-software-management.github.io/rpm/manual/boolean_dependencies.html for more details
//
// examples:
// - (rpmlib(PayloadIsZstd) <= 5.4.18-1)
// - (glibc-gconv-extra(aarch-64) = 2.34-83.el9.12 if redhat-rpm-config)
// - (java-headless or java-17-headless or java-11-headless or java-1.8.0-headless)
// - (llvm if clang)
// - (pyproject-rpm-macros = 1.9.0-1.el9 if pyproject-rpm-macros)
// - (gcc >= 11 with gcc < 12)
return !strings.HasPrefix(strings.TrimSpace(key), "(")
}

View file

@ -0,0 +1,106 @@
package redhat
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
)
func Test_dbEntryDependencySpecifier(t *testing.T) {
tests := []struct {
name string
p pkg.Package
want dependency.Specification
}{
{
name: "keeps given values + package name",
p: pkg.Package{
Name: "package-c",
Metadata: pkg.RpmDBEntry{
Provides: []string{"a-thing"},
Requires: []string{"b-thing"},
},
},
want: dependency.Specification{
Provides: []string{"package-c", "a-thing"},
Requires: []string{"b-thing"},
},
},
{
name: "strip unsupported keys",
p: pkg.Package{
Name: "package-a",
Metadata: pkg.RpmDBEntry{
Provides: []string{"libc.so.6(GLIBC_2.11)(64bit)"},
Requires: []string{"config(bash)", "(llvm if clang)"},
},
},
want: dependency.Specification{
Provides: []string{"package-a", "libc.so.6(GLIBC_2.11)(64bit)"},
Requires: []string{"config(bash)"},
},
},
{
name: "empty dependency data entries",
p: pkg.Package{
Name: "package-a",
Metadata: pkg.RpmDBEntry{
Provides: []string{""},
Requires: []string{""},
},
},
want: dependency.Specification{
Provides: []string{"package-a"},
Requires: nil,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, dbEntryDependencySpecifier(tt.p))
})
}
}
func Test_isSupportedKey(t *testing.T) {
tests := []struct {
name string
key string
want bool
}{
{
name: "paths allowed",
key: "/usr/bin/sh",
want: true,
},
{
name: "spaces stripped",
key: " filesystem ",
want: true,
},
{
name: "empty key",
key: "",
want: true,
},
{
name: "boolean expression",
key: "(pyproject-rpm-macros = 1.9.0-1.el9 if pyproject-rpm-macros)",
want: false,
},
{
name: "boolean expression with spaces stripped",
key: " (llvm if clang)",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, isSupportedKey(tt.key))
})
}
}

View file

@ -74,6 +74,8 @@ func parseRpmDB(_ context.Context, resolver file.Resolver, env *generic.Environm
Size: entry.Size,
ModularityLabel: &entry.Modularitylabel,
Files: extractRpmFileRecords(resolver, *entry),
Provides: entry.Provides,
Requires: entry.Requires,
}
p := newDBPackage(

View file

@ -110,6 +110,7 @@ func TestParseRpmDB(t *testing.T) {
Size: 12406784,
Vendor: "",
ModularityLabel: strRef(""),
Provides: []string{"dive"},
Files: []pkg.RpmFileRecord{},
},
},
@ -139,6 +140,7 @@ func TestParseRpmDB(t *testing.T) {
Size: 12406784,
Vendor: "",
ModularityLabel: strRef(""),
Provides: []string{"dive"},
Files: []pkg.RpmFileRecord{
{
Path: "/usr/local/bin/dive",

View file

@ -0,0 +1,5 @@
# linux/amd64
FROM rockylinux:9.3.20231119@sha256:45cc42828cc5ceeffa3a9b4f6363fb582fac3ab91f77bf403daa067f8f049f96
ADD remove.sh /remove.sh
RUN /remove.sh

View file

@ -0,0 +1,29 @@
#!/bin/bash
ESSENTIAL_PACKAGES=(
"basesystem"
"filesystem"
"bash"
)
ESSENTIALS_PATTERN=$(IFS='|'; echo "${ESSENTIAL_PACKAGES[*]}")
ALL_PACKAGES=$(rpm -qa --queryformat '%{NAME}\n')
PACKAGES_TO_REMOVE=()
for package in $ALL_PACKAGES; do
if ! [[ "$package" =~ ^($ESSENTIALS_PATTERN)$ ]]; then
PACKAGES_TO_REMOVE+=("$package")
else
echo "Skipping essential package: $package"
fi
done
if [ ${#PACKAGES_TO_REMOVE[@]} -gt 0 ]; then
echo "Removing non-essential packages..."
rpm -e --nodeps "${PACKAGES_TO_REMOVE[@]}"
else
echo "No non-essential packages to remove."
fi
# since we are still in the same terminal and the shell is loaded we can still echo :)
echo "Cleanup complete."

View file

@ -35,6 +35,8 @@ type RpmDBEntry struct {
Size int `json:"size" cyclonedx:"size"`
Vendor string `json:"vendor"`
ModularityLabel *string `json:"modularityLabel,omitempty"`
Provides []string `json:"provides,omitempty"`
Requires []string `json:"requires,omitempty"`
Files []RpmFileRecord `json:"files"`
}