feat: Added functionality to convert major, minor, patch to version for binary classifier (#2864)

---------
Signed-off-by: Laurent Goderre <laurent.goderre@docker.com>
This commit is contained in:
Laurent Goderre 2024-05-23 12:17:12 -04:00 committed by GitHub
parent 3875e4a67d
commit 7071f1e498
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 114 additions and 30 deletions

View file

@ -9,6 +9,7 @@ import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
"text/template"
@ -70,12 +71,18 @@ func (cfg Classifier) MarshalJSON() ([]byte, error) {
}
// EvidenceMatcher is a function called to catalog Packages that match some sort of evidence
type EvidenceMatcher func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error)
type EvidenceMatcher func(classifier Classifier, context matcherContext) ([]pkg.Package, error)
type matcherContext struct {
resolver file.Resolver
location file.Location
getContents func(resolver matcherContext) ([]byte, error)
}
func evidenceMatchers(matchers ...EvidenceMatcher) EvidenceMatcher {
return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) {
for _, matcher := range matchers {
match, err := matcher(resolver, classifier, location)
match, err := matcher(classifier, context)
if err != nil {
return nil, err
}
@ -89,12 +96,12 @@ func evidenceMatchers(matchers ...EvidenceMatcher) EvidenceMatcher {
func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) EvidenceMatcher {
pat := regexp.MustCompile(fileNamePattern)
return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
if !pat.MatchString(location.RealPath) {
return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) {
if !pat.MatchString(context.location.RealPath) {
return nil, nil
}
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, context.location.RealPath)
// versions like 3.5 should not match any character, but explicit dot
for k, v := range filepathNamedGroupValues {
@ -117,14 +124,14 @@ func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate stri
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
contents, err := getContents(resolver, location)
contents, err := getContents(context)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
p := newClassifierPackage(classifier, location, matchMetadata)
p := newClassifierPackage(classifier, context.location, matchMetadata)
if p == nil {
return nil, nil
}
@ -135,15 +142,31 @@ func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate stri
func FileContentsVersionMatcher(pattern string) EvidenceMatcher {
pat := regexp.MustCompile(pattern)
return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
contents, err := getContents(resolver, location)
return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) {
contents, err := getContents(context)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
p := newClassifierPackage(classifier, location, matchMetadata)
// Convert {major: 1, minor: 2, patch: 3} to "1.2.3"
_, versionOk := matchMetadata["version"]
majorStr, majorOk := matchMetadata["major"]
minorStr, minorOk := matchMetadata["minor"]
patchStr, patchOk := matchMetadata["patch"]
if !versionOk && majorOk && minorOk && patchOk {
major, majorErr := strconv.Atoi(majorStr)
minor, minorErr := strconv.Atoi(minorStr)
patch, patchErr := strconv.Atoi(patchStr)
if majorErr == nil && minorErr == nil && patchErr == nil {
matchMetadata["version"] = fmt.Sprintf("%d.%d.%d", major, minor, patch)
}
}
p := newClassifierPackage(classifier, context.location, matchMetadata)
if p == nil {
return nil, nil
}
@ -159,8 +182,8 @@ func matchExcluding(matcher EvidenceMatcher, contentPatternsToExclude ...string)
for _, p := range contentPatternsToExclude {
nonMatchPatterns = append(nonMatchPatterns, regexp.MustCompile(p))
}
return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
contents, err := getContents(resolver, location)
return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) {
contents, err := getContents(context)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
@ -169,15 +192,15 @@ func matchExcluding(matcher EvidenceMatcher, contentPatternsToExclude ...string)
return nil, nil
}
}
return matcher(resolver, classifier, location)
return matcher(classifier, context)
}
}
//nolint:gocognit
func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher EvidenceMatcher) EvidenceMatcher {
pat := regexp.MustCompile(sharedLibraryPattern)
return func(resolver file.Resolver, classifier Classifier, location file.Location) (packages []pkg.Package, _ error) {
libs, err := sharedLibraries(resolver, location)
return func(classifier Classifier, context matcherContext) (packages []pkg.Package, _ error) {
libs, err := sharedLibraries(context)
if err != nil {
return nil, err
}
@ -186,18 +209,24 @@ func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher Evide
continue
}
locations, err := resolver.FilesByGlob("**/" + lib)
locations, err := context.resolver.FilesByGlob("**/" + lib)
if err != nil {
return nil, err
}
for _, libraryLocation := range locations {
pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation)
newResolver := matcherContext{
resolver: context.resolver,
location: libraryLocation,
getContents: context.getContents,
}
newResolver.location = libraryLocation
pkgs, err := sharedLibraryMatcher(classifier, newResolver)
if err != nil {
return nil, err
}
for _, p := range pkgs {
// set the source binary as the first location
locationSet := file.NewLocationSet(location)
locationSet := file.NewLocationSet(context.location)
locationSet.Add(p.Locations.ToSlice()...)
p.Locations = locationSet
meta, _ := p.Metadata.(pkg.BinarySignature)
@ -205,7 +234,7 @@ func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher Evide
Matches: append([]pkg.ClassifierMatch{
{
Classifier: classifier.Class,
Location: location,
Location: context.location,
},
}, meta.Matches...),
}
@ -225,12 +254,15 @@ func mustPURL(purl string) packageurl.PackageURL {
return p
}
func getContents(resolver file.Resolver, location file.Location) ([]byte, error) {
reader, err := resolver.FileContentsByLocation(location)
func getContents(context matcherContext) ([]byte, error) {
if context.getContents != nil {
return context.getContents(context)
}
reader, err := context.resolver.FileContentsByLocation(context.location)
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(reader, location.AccessPath)
defer internal.CloseAndLogError(reader, context.location.AccessPath)
// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
contents, err := io.ReadAll(reader)
@ -251,8 +283,8 @@ func singleCPE(cpeString string) []cpe.CPE {
// sharedLibraries returns a list of all shared libraries found within a binary, currently
// supporting: elf, macho, and windows pe
func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) {
contents, err := getContents(resolver, location)
func sharedLibraries(context matcherContext) ([]string, error) {
contents, err := getContents(context)
if err != nil {
return nil, err
}
@ -263,7 +295,7 @@ func sharedLibraries(resolver file.Resolver, location file.Location) ([]string,
if e != nil {
symbols, err := e.ImportedLibraries()
if err != nil {
log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
log.Debugf("unable to read elf binary at: %s -- %s", context.location.RealPath, err)
}
return symbols, nil
}
@ -272,7 +304,7 @@ func sharedLibraries(resolver file.Resolver, location file.Location) ([]string,
if m != nil {
symbols, err := m.ImportedLibraries()
if err != nil {
log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
log.Debugf("unable to read macho binary at: %s -- %s", context.location.RealPath, err)
}
return symbols, nil
}
@ -281,7 +313,7 @@ func sharedLibraries(resolver file.Resolver, location file.Location) ([]string,
if p != nil {
symbols, err := p.ImportedLibraries()
if err != nil {
log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
log.Debugf("unable to read pe binary at: %s -- %s", context.location.RealPath, err)
}
return symbols, nil
}

View file

@ -103,7 +103,7 @@ func catalog(resolver file.Resolver, cls Classifier) (packages []pkg.Package, er
return nil, err
}
for _, location := range locations {
pkgs, err := cls.EvidenceMatcher(resolver, cls, location)
pkgs, err := cls.EvidenceMatcher(cls, matcherContext{resolver: resolver, location: location})
if err != nil {
return nil, err
}

View file

@ -61,6 +61,17 @@ func Test_ClassifierCPEs(t *testing.T) {
"cpe:2.3:a:some:apps:1.8:*:*:*:*:*:*:*",
},
},
{
name: "version in parts",
fixture: "test-fixtures/version-parts.txt",
classifier: Classifier{
Package: "some-app",
FileGlob: "**/version-parts.txt",
EvidenceMatcher: FileContentsVersionMatcher(`(?m)\x00(?P<major>[0-9.]+)\x00(?P<minor>[0-9.]+)\x00(?P<patch>[0-9.]+)\x00`),
CPEs: []cpe.CPE{},
},
cpes: nil,
},
}
for _, test := range tests {
@ -70,7 +81,7 @@ func Test_ClassifierCPEs(t *testing.T) {
require.NoError(t, err)
require.Len(t, ls, 1)
pkgs, err := test.classifier.EvidenceMatcher(resolver, test.classifier, ls[0])
pkgs, err := test.classifier.EvidenceMatcher(test.classifier, matcherContext{resolver: resolver, location: ls[0]})
require.NoError(t, err)
require.Len(t, pkgs, 1)
@ -128,3 +139,44 @@ func TestClassifier_MarshalJSON(t *testing.T) {
})
}
}
func TestFileContentsVersionMatcher(t *testing.T) {
tests := []struct {
name string
pattern string
data string
expected string
}{
{
name: "simple version string regexp",
pattern: `some data (?P<version>[0-9]+\.[0-9]+\.[0-9]+) some data`,
data: "some data 1.2.3 some data",
expected: "1.2.3",
},
{
name: "version parts regexp",
pattern: `\x00\x23(?P<major>[0-9]+)\x00\x23(?P<minor>[0-9]+)\x00\x23(?P<patch>[0-9]+)\x00\x23`,
data: "\x00\x239\x00\x239\x00\x239\x00\x23",
expected: "9.9.9",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mockGetContent := func(context matcherContext) ([]byte, error) {
return []byte(tt.data), nil
}
fn := FileContentsVersionMatcher(tt.pattern)
p, err := fn(Classifier{}, matcherContext{
getContents: mockGetContent,
})
if err != nil {
t.Errorf("Unexpected error %#v", err)
}
if p[0].Version != tt.expected {
t.Errorf("Versions don't match.\ngot\n%q\n\nexpected\n%q", p[0].Version, tt.expected)
}
})
}
}