From 7ff37a031022812d6dfa530fade11275bc6ac2f2 Mon Sep 17 00:00:00 2001 From: William Murphy Date: Fri, 18 Aug 2023 15:43:42 -0400 Subject: [PATCH] feat: filter out packages owned by OS packages (#1387) For example, if the rpm "python3-rpm" is installed, it brings a python package called "rpm" with it, which is just python bindings to RPM. But this python package is part of "python3-rpm", and should not be matched against directly. Only apply this deduplication strategy on distros with a comprehensive enough vulnerability feed that we don't expect false negatives from it. Signed-off-by: Will Murphy --- grype/pkg/package.go | 65 ++++++++++++++++++++++++++++++--- grype/pkg/package_test.go | 58 +++++++++++++++++++++++------ grype/pkg/syft_provider.go | 2 +- grype/pkg/syft_sbom_provider.go | 3 +- test/integration/utils_test.go | 5 ++- 5 files changed, 111 insertions(+), 22 deletions(-) diff --git a/grype/pkg/package.go b/grype/pkg/package.go index b9fd50e0..5b65015a 100644 --- a/grype/pkg/package.go +++ b/grype/pkg/package.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" cpes "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" ) @@ -101,7 +102,7 @@ func (p Package) String() string { return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s, upstreams=%d)", p.Type, p.Name, p.Version, len(p.Upstreams)) } -func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship) *pkg.Collection { +func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship, distro *linux.Release) *pkg.Collection { byOverlap := map[artifact.ID]artifact.Relationship{} for _, r := range relationships { if r.Type == artifact.OwnershipByFileOverlapRelationship { @@ -110,12 +111,12 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R } out := pkg.NewCollection() - + comprehensiveDistroFeed := distroFeedIsComprehensive(distro) for p := range catalog.Enumerate() { r, ok := byOverlap[p.ID()] if ok { from, ok := r.From.(pkg.Package) - if ok && excludePackage(p, from) { + if ok && excludePackage(comprehensiveDistroFeed, p, from) { continue } } @@ -125,7 +126,7 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R return out } -func excludePackage(p pkg.Package, parent pkg.Package) bool { +func excludePackage(comprehensiveDistroFeed bool, p pkg.Package, parent pkg.Package) bool { // NOTE: we are not checking the name because we have mismatches like: // python 3.9.2 binary // python3.9 3.9.2-1 deb @@ -135,7 +136,15 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool { return false } - // filter out only binary pkg + // If the parent is an OS package and the child is not, exclude the child + // for distros that have a comprehensive feed. That is, distros that list + // vulnerabilities that aren't fixed. Otherwise, the child package might + // be needed for matching. + if comprehensiveDistroFeed && isOSPackage(parent) && !isOSPackage(p) { + return true + } + + // filter out binary packages, even for non-comprehensive distros if p.Type != pkg.BinaryPkg { return false } @@ -143,6 +152,52 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool { return true } +// distroFeedIsComprehensive returns true if the distro feed +// is comprehensive enough that we can drop packages owned by distro packages +// before matching. +func distroFeedIsComprehensive(distro *linux.Release) bool { + // TODO: this mechanism should be re-examined once https://github.com/anchore/grype/issues/1426 + // is addressed + if distro == nil { + return false + } + if distro.ID == "amzn" { + // AmazonLinux shows "like rhel" but is not an rhel clone + // and does not have an exhaustive vulnerability feed. + return false + } + for _, d := range comprehensiveDistros { + if strings.EqualFold(d, distro.ID) { + return true + } + for _, n := range distro.IDLike { + if strings.EqualFold(d, n) { + return true + } + } + } + return false +} + +// computed by: +// sqlite3 vulnerability.db 'select distinct namespace from vulnerability where fix_state in ("wont-fix", "not-fixed") order by namespace;' | cut -d ':' -f 1 | sort | uniq +// then removing 'github' and replacing 'redhat' with 'rhel' +var comprehensiveDistros = []string{ + "debian", + "mariner", + "rhel", + "ubuntu", +} + +func isOSPackage(p pkg.Package) bool { + switch p.Type { + case pkg.DebPkg, pkg.RpmPkg, pkg.PortagePkg, pkg.AlpmPkg, pkg.ApkPkg: + return true + default: + return false + } +} + func dataFromPkg(p pkg.Package) (MetadataType, interface{}, []UpstreamPackage) { var metadata interface{} var upstreams []UpstreamPackage diff --git a/grype/pkg/package_test.go b/grype/pkg/package_test.go index 3eddc785..d294ce7b 100644 --- a/grype/pkg/package_test.go +++ b/grype/pkg/package_test.go @@ -13,7 +13,9 @@ import ( "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" syftFile "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" syftPkg "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" ) func TestNew(t *testing.T) { @@ -656,10 +658,10 @@ func intRef(i int) *int { return &i } -func Test_RemoveBinaryPackagesByOverlap(t *testing.T) { +func Test_RemovePackagesByOverlap(t *testing.T) { tests := []struct { name string - sbom catalogRelationships + sbom *sbom.SBOM expectedPackages []string }{ { @@ -704,10 +706,38 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) { []string{"rpm:node@19.2-r1 -> apk:node@19.2"}), expectedPackages: []string{"apk:node@19.2", "rpm:node@19.2-r1"}, }, + { + name: "does not exclude if OS package owns OS package", + sbom: catalogWithOverlaps( + []string{"rpm:perl@5.3-r1", "rpm:libperl@5.3"}, + []string{"rpm:perl@5.3-r1 -> rpm:libperl@5.3"}), + expectedPackages: []string{"rpm:libperl@5.3", "rpm:perl@5.3-r1"}, + }, + { + name: "does not exclude if owning package is non-OS", + sbom: catalogWithOverlaps( + []string{"python:urllib3@1.2.3", "python:otherlib@1.2.3"}, + []string{"python:urllib3@1.2.3 -> python:otherlib@1.2.3"}), + expectedPackages: []string{"python:otherlib@1.2.3", "python:urllib3@1.2.3"}, + }, + { + name: "python bindings for system RPM install", + sbom: withDistro(catalogWithOverlaps( + []string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"}, + []string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "rhel"), + expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8"}, + }, + { + name: "amzn linux doesn't remove packages in this way", + sbom: withDistro(catalogWithOverlaps( + []string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"}, + []string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "amzn"), + expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"}, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - catalog := removePackagesByOverlap(test.sbom.collection, test.sbom.relationships) + catalog := removePackagesByOverlap(test.sbom.Artifacts.Packages, test.sbom.Relationships, test.sbom.Artifacts.LinuxDistribution) pkgs := FromCollection(catalog, SynthesisConfig{}) var pkgNames []string for _, p := range pkgs { @@ -718,12 +748,7 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) { } } -type catalogRelationships struct { - collection *syftPkg.Collection - relationships []artifact.Relationship -} - -func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationships { +func catalogWithOverlaps(packages []string, overlaps []string) *sbom.SBOM { var pkgs []syftPkg.Package var relationships []artifact.Relationship @@ -772,8 +797,17 @@ func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationsh catalog := syftPkg.NewCollection(pkgs...) - return catalogRelationships{ - collection: catalog, - relationships: relationships, + return &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: catalog, + }, + Relationships: relationships, } } + +func withDistro(s *sbom.SBOM, id string) *sbom.SBOM { + s.Artifacts.LinuxDistribution = &linux.Release{ + ID: id, + } + return s +} diff --git a/grype/pkg/syft_provider.go b/grype/pkg/syft_provider.go index 6aa7d96c..0f1b725b 100644 --- a/grype/pkg/syft_provider.go +++ b/grype/pkg/syft_provider.go @@ -27,7 +27,7 @@ func syftProvider(userInput string, config ProviderConfig) ([]Package, Context, return nil, Context{}, nil, err } - catalog = removePackagesByOverlap(catalog, relationships) + catalog = removePackagesByOverlap(catalog, relationships, theDistro) srcDescription := src.Describe() diff --git a/grype/pkg/syft_sbom_provider.go b/grype/pkg/syft_sbom_provider.go index e316925c..0157a4d7 100644 --- a/grype/pkg/syft_sbom_provider.go +++ b/grype/pkg/syft_sbom_provider.go @@ -30,8 +30,7 @@ func syftSBOMProvider(userInput string, config ProviderConfig) ([]Package, Conte return nil, Context{}, nil, err } - catalog := s.Artifacts.Packages - catalog = removePackagesByOverlap(catalog, s.Relationships) + catalog := removePackagesByOverlap(s.Artifacts.Packages, s.Relationships, s.Artifacts.LinuxDistribution) return FromCollection(catalog, config.SynthesisConfig), Context{ Source: &s.Source, diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index c86ae5c6..4f5a4098 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -87,14 +87,15 @@ func getSyftSBOM(t testing.TB, image string, format sbom.Format) string { config := cataloger.DefaultConfig() config.Search.Scope = source.SquashedScope // TODO: relationships are not verified at this time - collection, _, distro, err := syft.CatalogPackages(src, config) + collection, relationships, distro, err := syft.CatalogPackages(src, config) s := sbom.SBOM{ Artifacts: sbom.Artifacts{ Packages: collection, LinuxDistribution: distro, }, - Source: src.Describe(), + Relationships: relationships, + Source: src.Describe(), } bytes, err := syft.Encode(s, format)