mirror of
https://github.com/anchore/grype
synced 2024-11-10 14:44:12 +00:00
feat: filter out packages owned by OS packages (#1387)
For example, if the rpm "python3-rpm" is installed, it brings a python package called "rpm" with it, which is just python bindings to RPM. But this python package is part of "python3-rpm", and should not be matched against directly. Only apply this deduplication strategy on distros with a comprehensive enough vulnerability feed that we don't expect false negatives from it. Signed-off-by: Will Murphy <will.murphy@anchore.com>
This commit is contained in:
parent
9e119c87a4
commit
7ff37a0310
5 changed files with 111 additions and 22 deletions
|
@ -10,6 +10,7 @@ import (
|
|||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/linux"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
cpes "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
|
||||
)
|
||||
|
@ -101,7 +102,7 @@ func (p Package) String() string {
|
|||
return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s, upstreams=%d)", p.Type, p.Name, p.Version, len(p.Upstreams))
|
||||
}
|
||||
|
||||
func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship) *pkg.Collection {
|
||||
func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.Relationship, distro *linux.Release) *pkg.Collection {
|
||||
byOverlap := map[artifact.ID]artifact.Relationship{}
|
||||
for _, r := range relationships {
|
||||
if r.Type == artifact.OwnershipByFileOverlapRelationship {
|
||||
|
@ -110,12 +111,12 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R
|
|||
}
|
||||
|
||||
out := pkg.NewCollection()
|
||||
|
||||
comprehensiveDistroFeed := distroFeedIsComprehensive(distro)
|
||||
for p := range catalog.Enumerate() {
|
||||
r, ok := byOverlap[p.ID()]
|
||||
if ok {
|
||||
from, ok := r.From.(pkg.Package)
|
||||
if ok && excludePackage(p, from) {
|
||||
if ok && excludePackage(comprehensiveDistroFeed, p, from) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
@ -125,7 +126,7 @@ func removePackagesByOverlap(catalog *pkg.Collection, relationships []artifact.R
|
|||
return out
|
||||
}
|
||||
|
||||
func excludePackage(p pkg.Package, parent pkg.Package) bool {
|
||||
func excludePackage(comprehensiveDistroFeed bool, p pkg.Package, parent pkg.Package) bool {
|
||||
// NOTE: we are not checking the name because we have mismatches like:
|
||||
// python 3.9.2 binary
|
||||
// python3.9 3.9.2-1 deb
|
||||
|
@ -135,7 +136,15 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// filter out only binary pkg
|
||||
// If the parent is an OS package and the child is not, exclude the child
|
||||
// for distros that have a comprehensive feed. That is, distros that list
|
||||
// vulnerabilities that aren't fixed. Otherwise, the child package might
|
||||
// be needed for matching.
|
||||
if comprehensiveDistroFeed && isOSPackage(parent) && !isOSPackage(p) {
|
||||
return true
|
||||
}
|
||||
|
||||
// filter out binary packages, even for non-comprehensive distros
|
||||
if p.Type != pkg.BinaryPkg {
|
||||
return false
|
||||
}
|
||||
|
@ -143,6 +152,52 @@ func excludePackage(p pkg.Package, parent pkg.Package) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// distroFeedIsComprehensive returns true if the distro feed
|
||||
// is comprehensive enough that we can drop packages owned by distro packages
|
||||
// before matching.
|
||||
func distroFeedIsComprehensive(distro *linux.Release) bool {
|
||||
// TODO: this mechanism should be re-examined once https://github.com/anchore/grype/issues/1426
|
||||
// is addressed
|
||||
if distro == nil {
|
||||
return false
|
||||
}
|
||||
if distro.ID == "amzn" {
|
||||
// AmazonLinux shows "like rhel" but is not an rhel clone
|
||||
// and does not have an exhaustive vulnerability feed.
|
||||
return false
|
||||
}
|
||||
for _, d := range comprehensiveDistros {
|
||||
if strings.EqualFold(d, distro.ID) {
|
||||
return true
|
||||
}
|
||||
for _, n := range distro.IDLike {
|
||||
if strings.EqualFold(d, n) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// computed by:
|
||||
// sqlite3 vulnerability.db 'select distinct namespace from vulnerability where fix_state in ("wont-fix", "not-fixed") order by namespace;' | cut -d ':' -f 1 | sort | uniq
|
||||
// then removing 'github' and replacing 'redhat' with 'rhel'
|
||||
var comprehensiveDistros = []string{
|
||||
"debian",
|
||||
"mariner",
|
||||
"rhel",
|
||||
"ubuntu",
|
||||
}
|
||||
|
||||
func isOSPackage(p pkg.Package) bool {
|
||||
switch p.Type {
|
||||
case pkg.DebPkg, pkg.RpmPkg, pkg.PortagePkg, pkg.AlpmPkg, pkg.ApkPkg:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func dataFromPkg(p pkg.Package) (MetadataType, interface{}, []UpstreamPackage) {
|
||||
var metadata interface{}
|
||||
var upstreams []UpstreamPackage
|
||||
|
|
|
@ -13,7 +13,9 @@ import (
|
|||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
syftFile "github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/linux"
|
||||
syftPkg "github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/sbom"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
|
@ -656,10 +658,10 @@ func intRef(i int) *int {
|
|||
return &i
|
||||
}
|
||||
|
||||
func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
|
||||
func Test_RemovePackagesByOverlap(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
sbom catalogRelationships
|
||||
sbom *sbom.SBOM
|
||||
expectedPackages []string
|
||||
}{
|
||||
{
|
||||
|
@ -704,10 +706,38 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
|
|||
[]string{"rpm:node@19.2-r1 -> apk:node@19.2"}),
|
||||
expectedPackages: []string{"apk:node@19.2", "rpm:node@19.2-r1"},
|
||||
},
|
||||
{
|
||||
name: "does not exclude if OS package owns OS package",
|
||||
sbom: catalogWithOverlaps(
|
||||
[]string{"rpm:perl@5.3-r1", "rpm:libperl@5.3"},
|
||||
[]string{"rpm:perl@5.3-r1 -> rpm:libperl@5.3"}),
|
||||
expectedPackages: []string{"rpm:libperl@5.3", "rpm:perl@5.3-r1"},
|
||||
},
|
||||
{
|
||||
name: "does not exclude if owning package is non-OS",
|
||||
sbom: catalogWithOverlaps(
|
||||
[]string{"python:urllib3@1.2.3", "python:otherlib@1.2.3"},
|
||||
[]string{"python:urllib3@1.2.3 -> python:otherlib@1.2.3"}),
|
||||
expectedPackages: []string{"python:otherlib@1.2.3", "python:urllib3@1.2.3"},
|
||||
},
|
||||
{
|
||||
name: "python bindings for system RPM install",
|
||||
sbom: withDistro(catalogWithOverlaps(
|
||||
[]string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
|
||||
[]string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "rhel"),
|
||||
expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8"},
|
||||
},
|
||||
{
|
||||
name: "amzn linux doesn't remove packages in this way",
|
||||
sbom: withDistro(catalogWithOverlaps(
|
||||
[]string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
|
||||
[]string{"rpm:python3-rpm@4.14.3-26.el8 -> python:rpm@4.14.3"}), "amzn"),
|
||||
expectedPackages: []string{"rpm:python3-rpm@4.14.3-26.el8", "python:rpm@4.14.3"},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
catalog := removePackagesByOverlap(test.sbom.collection, test.sbom.relationships)
|
||||
catalog := removePackagesByOverlap(test.sbom.Artifacts.Packages, test.sbom.Relationships, test.sbom.Artifacts.LinuxDistribution)
|
||||
pkgs := FromCollection(catalog, SynthesisConfig{})
|
||||
var pkgNames []string
|
||||
for _, p := range pkgs {
|
||||
|
@ -718,12 +748,7 @@ func Test_RemoveBinaryPackagesByOverlap(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
type catalogRelationships struct {
|
||||
collection *syftPkg.Collection
|
||||
relationships []artifact.Relationship
|
||||
}
|
||||
|
||||
func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationships {
|
||||
func catalogWithOverlaps(packages []string, overlaps []string) *sbom.SBOM {
|
||||
var pkgs []syftPkg.Package
|
||||
var relationships []artifact.Relationship
|
||||
|
||||
|
@ -772,8 +797,17 @@ func catalogWithOverlaps(packages []string, overlaps []string) catalogRelationsh
|
|||
|
||||
catalog := syftPkg.NewCollection(pkgs...)
|
||||
|
||||
return catalogRelationships{
|
||||
collection: catalog,
|
||||
relationships: relationships,
|
||||
return &sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: catalog,
|
||||
},
|
||||
Relationships: relationships,
|
||||
}
|
||||
}
|
||||
|
||||
func withDistro(s *sbom.SBOM, id string) *sbom.SBOM {
|
||||
s.Artifacts.LinuxDistribution = &linux.Release{
|
||||
ID: id,
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ func syftProvider(userInput string, config ProviderConfig) ([]Package, Context,
|
|||
return nil, Context{}, nil, err
|
||||
}
|
||||
|
||||
catalog = removePackagesByOverlap(catalog, relationships)
|
||||
catalog = removePackagesByOverlap(catalog, relationships, theDistro)
|
||||
|
||||
srcDescription := src.Describe()
|
||||
|
||||
|
|
|
@ -30,8 +30,7 @@ func syftSBOMProvider(userInput string, config ProviderConfig) ([]Package, Conte
|
|||
return nil, Context{}, nil, err
|
||||
}
|
||||
|
||||
catalog := s.Artifacts.Packages
|
||||
catalog = removePackagesByOverlap(catalog, s.Relationships)
|
||||
catalog := removePackagesByOverlap(s.Artifacts.Packages, s.Relationships, s.Artifacts.LinuxDistribution)
|
||||
|
||||
return FromCollection(catalog, config.SynthesisConfig), Context{
|
||||
Source: &s.Source,
|
||||
|
|
|
@ -87,14 +87,15 @@ func getSyftSBOM(t testing.TB, image string, format sbom.Format) string {
|
|||
config := cataloger.DefaultConfig()
|
||||
config.Search.Scope = source.SquashedScope
|
||||
// TODO: relationships are not verified at this time
|
||||
collection, _, distro, err := syft.CatalogPackages(src, config)
|
||||
collection, relationships, distro, err := syft.CatalogPackages(src, config)
|
||||
|
||||
s := sbom.SBOM{
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: collection,
|
||||
LinuxDistribution: distro,
|
||||
},
|
||||
Source: src.Describe(),
|
||||
Relationships: relationships,
|
||||
Source: src.Describe(),
|
||||
}
|
||||
|
||||
bytes, err := syft.Encode(s, format)
|
||||
|
|
Loading…
Reference in a new issue