feat: filter out packages owned by OS packages (#1387)

For example, if the rpm "python3-rpm" is installed, it brings a python
package called "rpm" with it, which is just python bindings to RPM. But
this python package is part of "python3-rpm", and should not be matched
against directly. Only apply this deduplication strategy on distros with 
a comprehensive enough vulnerability feed that we don't expect false 
negatives from it.

Signed-off-by: Will Murphy <will.murphy@anchore.com>
This commit is contained in:
William Murphy 2023-08-18 15:43:42 -04:00 committed by GitHub
parent 9e119c87a4
commit 7ff37a0310
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 111 additions and 22 deletions

View file

@ -10,6 +10,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg"
cpes "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
@ -101,7 +102,7 @@ func (p Package) String() string {
return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s, upstreams=%d)", p.Type, p.Name, p.Version, len(p.Upstreams))
}
func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship) *pkg.Collection {
func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship, distro *linux.Release) *pkg.Collection {
byOverlap := map[artifact.ID]artifact.Relationship{}
for _, r := range relationships {
if r.Type == artifact.OwnershipByFileOverlapRelationship {
@ -110,12 +111,12 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R
}
out := pkg.NewCollection()
comprehensiveDistroFeed := distroFeedIsComprehensive(distro)
for p := range catalog.Enumerate() {
r, ok := byOverlap[p.ID()]
if ok {
from, ok := r.From.(pkg.Package)
if ok && excludePackage(p, from) {
if ok && excludePackage(comprehensiveDistroFeed, p, from) {
continue
}
}
@ -125,7 +126,7 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R
return out
}
func excludePackage(p pkg.Package, parent pkg.Package) bool {
func excludePackage(comprehensiveDistroFeed bool, p pkg.Package, parent pkg.Package) bool {
// NOTE: we are not checking the name because we have mismatches like:
// python 3.9.2 binary
// python3.9 3.9.2-1 deb
@ -135,7 +136,15 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool {
return false
}
// filter out only binary pkg
// If the parent is an OS package and the child is not, exclude the child
// for distros that have a comprehensive feed. That is, distros that list
// vulnerabilities that aren't fixed. Otherwise, the child package might
// be needed for matching.
if comprehensiveDistroFeed && isOSPackage(parent) && !isOSPackage(p) {
return true
}
// filter out binary packages, even for non-comprehensive distros
if p.Type != pkg.BinaryPkg {
return false
}
@ -143,6 +152,52 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool {
return true
}
// distroFeedIsComprehensive returns true if the distro feed
// is comprehensive enough that we can drop packages owned by distro packages
// before matching.
func distroFeedIsComprehensive(distro *linux.Release) bool {
// TODO: this mechanism should be re-examined once https://github.com/anchore/grype/issues/1426
// is addressed
if distro == nil {
return false
}
if distro.ID == "amzn" {
// AmazonLinux shows "like rhel" but is not an rhel clone
// and does not have an exhaustive vulnerability feed.
return false
}
for _, d := range comprehensiveDistros {
if strings.EqualFold(d, distro.ID) {
return true
}
for _, n := range distro.IDLike {
if strings.EqualFold(d, n) {
return true
}
}
}
return false
}
// computed by:
// sqlite3 vulnerability.db 'select distinct namespace from vulnerability where fix_state in ("wont-fix", "not-fixed") order by namespace;' | cut -d ':' -f 1 | sort | uniq
// then removing 'github' and replacing 'redhat' with 'rhel'
var comprehensiveDistros = []string{
"debian",
"mariner",
"rhel",
"ubuntu",
}
func isOSPackage(p pkg.Package) bool {
switch p.Type {
case pkg.DebPkg, pkg.RpmPkg, pkg.PortagePkg, pkg.AlpmPkg, pkg.ApkPkg:
return true
default:
return false
}
}
func dataFromPkg(p pkg.Package) (MetadataType, interface{}, []UpstreamPackage) {
var metadata interface{}
var upstreams []UpstreamPackage

View file

@ -13,7 +13,9 @@ import (
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
syftPkg "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func TestNew(t *testing.T) {
@ -656,10 +658,10 @@ func intRef(i int) *int {
return &i
}
func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
func Test_RemovePackagesByOverlap(t *testing.T) {
tests := []struct {
name string
sbom catalogRelationships
sbom *sbom.SBOM
expectedPackages []string
}{
{
@ -704,10 +706,38 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
[]string{"rpm:node@19.2-r1 -> apk:node@19.2"}),
expectedPackages: []string{"apk:node@19.2", "rpm:node@19.2-r1"},
},
{
name: "does not exclude if OS package owns OS package",
sbom: catalogWithOverlaps(
[]string{"rpm:perl@5.3-r1", "rpm:libperl@5.3"},
[]string{"rpm:perl@5.3-r1 -> rpm:libperl@5.3"}),
expectedPackages: []string{"rpm:libperl@5.3", "rpm:perl@5.3-r1"},
},
{
name: "does not exclude if owning package is non-OS",
sbom: catalogWithOverlaps(
[]string{"python:urllib3@1.2.3", "python:otherlib@1.2.3"},
[]string{"python:urllib3@1.2.3 -> python:otherlib@1.2.3"}),
expectedPackages: []string{"python:otherlib@1.2.3", "python:urllib3@1.2.3"},
},
{
name: "python bindings for system RPM install",
sbom: withDistro(catalogWithOverlaps(
[]string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
[]string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "rhel"),
expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8"},
},
{
name: "amzn linux doesn't remove packages in this way",
sbom: withDistro(catalogWithOverlaps(
[]string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
[]string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "amzn"),
expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
catalog := removePackagesByOverlap(test.sbom.collection, test.sbom.relationships)
catalog := removePackagesByOverlap(test.sbom.Artifacts.Packages, test.sbom.Relationships, test.sbom.Artifacts.LinuxDistribution)
pkgs := FromCollection(catalog, SynthesisConfig{})
var pkgNames []string
for _, p := range pkgs {
@ -718,12 +748,7 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
}
}
type catalogRelationships struct {
collection *syftPkg.Collection
relationships []artifact.Relationship
}
func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationships {
func catalogWithOverlaps(packages []string, overlaps []string) *sbom.SBOM {
var pkgs []syftPkg.Package
var relationships []artifact.Relationship
@ -772,8 +797,17 @@ func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationsh
catalog := syftPkg.NewCollection(pkgs...)
return catalogRelationships{
collection: catalog,
relationships: relationships,
return &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: catalog,
},
Relationships: relationships,
}
}
func withDistro(s *sbom.SBOM, id string) *sbom.SBOM {
s.Artifacts.LinuxDistribution = &linux.Release{
ID: id,
}
return s
}

View file

@ -27,7 +27,7 @@ func syftProvider(userInput string, config ProviderConfig) ([]Package, Context,
return nil, Context{}, nil, err
}
catalog = removePackagesByOverlap(catalog, relationships)
catalog = removePackagesByOverlap(catalog, relationships, theDistro)
srcDescription := src.Describe()

View file

@ -30,8 +30,7 @@ func syftSBOMProvider(userInput string, config ProviderConfig) ([]Package, Conte
return nil, Context{}, nil, err
}
catalog := s.Artifacts.Packages
catalog = removePackagesByOverlap(catalog, s.Relationships)
catalog := removePackagesByOverlap(s.Artifacts.Packages, s.Relationships, s.Artifacts.LinuxDistribution)
return FromCollection(catalog, config.SynthesisConfig), Context{
Source: &s.Source,

View file

@ -87,14 +87,15 @@ func getSyftSBOM(t testing.TB, image string, format sbom.Format) string {
config := cataloger.DefaultConfig()
config.Search.Scope = source.SquashedScope
// TODO: relationships are not verified at this time
collection, _, distro, err := syft.CatalogPackages(src, config)
collection, relationships, distro, err := syft.CatalogPackages(src, config)
s := sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: collection,
LinuxDistribution: distro,
},
Source: src.Describe(),
Relationships: relationships,
Source: src.Describe(),
}
bytes, err := syft.Encode(s, format)