feat: add nodejs-binary package classifier (#1296)

This commit is contained in:
Christopher Angelo Phillips 2022-10-31 12:45:11 -04:00 committed by GitHub
parent 919c929798
commit edeba9c01c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 209 additions and 12 deletions

View file

@ -59,6 +59,7 @@ var (
"application/x-elf",
"application/x-sharedlib",
"application/vnd.microsoft.portable-executable",
"application/x-executable",
}...,
)
)

View file

@ -93,7 +93,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
location: "[", // note: busybox is a link to [
expected: []Classification{
{
Class: "busybox-binary",
Class: "busybox-binary",
VirtualPath: "busybox",
Metadata: map[string]string{
"version": "3.33.3",
},
@ -148,7 +149,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T
location: "/bin/[",
expected: []Classification{
{
Class: "busybox-binary",
Class: "busybox-binary",
VirtualPath: "/bin/busybox",
Metadata: map[string]string{
"version": "1.35.0",
},

View file

@ -40,6 +40,16 @@ var DefaultClassifiers = []Classifier{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "nodejs-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatternTemplates: []string{
// regex that matches node.js/vx.y.z
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
{
Class: "go-binary-hint",
FilepathPatterns: []*regexp.Regexp{
@ -67,12 +77,13 @@ type Classifier struct {
}
type Classification struct {
Class string `json:"class"`
Metadata map[string]string `json:"metadata"`
Class string `json:"class"`
VirtualPath string `json:"virtual_path"`
Metadata map[string]string `json:"metadata"`
}
func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location)
doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location)
if !doesFilepathMatch {
return nil, nil
}
@ -114,8 +125,9 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
if result == nil {
result = &Classification{
Class: c.Class,
Metadata: matchMetadata,
Class: c.Class,
VirtualPath: location.VirtualPath,
Metadata: matchMetadata,
}
} else {
for key, value := range matchMetadata {
@ -126,7 +138,7 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat
return result, nil
}
func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
for _, path := range []string{location.RealPath, location.VirtualPath} {
if path == "" {
continue

View file

@ -89,7 +89,7 @@ func TestFilepathMatches(t *testing.T) {
for _, p := range test.patterns {
patterns = append(patterns, regexp.MustCompile(p))
}
actualMatches, actualNamedGroups := filepathMatches(patterns, test.location)
actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location)
assert.Equal(t, test.expectedMatches, actualMatches)
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
})

View file

@ -183,6 +183,14 @@ func Test_SourceInfo(t *testing.T) {
"from cabal or stack manifest files",
},
},
{
input: pkg.Package{
Type: pkg.BinaryPkg,
},
expected: []string{
"acquired package info from the following paths",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {

View file

@ -0,0 +1,7 @@
package pkg
type BinaryMetadata struct {
Classifier string
RealPath string
VirtualPath string
}

View file

@ -66,7 +66,8 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..
for _, p := range packages {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpe.Generate(p)
// we might have binary classified CPE already with the package so we want to append here
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)
@ -85,7 +86,6 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..
} else {
allRelationships = append(allRelationships, owningRelationships...)
}
// add to catalog
catalog.Add(p)
}

View file

@ -39,6 +39,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerInstalledCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
@ -58,6 +59,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerLockCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
rpm.NewFileCataloger(),
@ -86,6 +88,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
rpm.NewFileCataloger(),

View file

@ -70,6 +70,11 @@ func candidateVendors(p pkg.Package) []string {
vendors := newFieldCandidateSet(candidateProducts(p)...)
switch p.Language {
case pkg.JavaScript:
// for JavaScript if we find node.js as a package then the vendor is "nodejs"
if p.Name == "node.js" {
vendors.addValue("nodejs")
}
case pkg.Ruby:
vendors.addValue("ruby-lang")
case pkg.Go:

View file

@ -0,0 +1,87 @@
package generic
import (
"fmt"
"io"
"path"
"regexp"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
"github.com/anchore/syft/syft/source"
)
// Classifier is a generic package classifier that can be used to match a package definition
// to a file that meets the given content criteria of the EvidencePatternTemplates.
type Classifier struct {
Package string
// FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given
// source location. If any of the patterns match, the file will be considered a candidate for parsing.
// If no patterns are provided, the reader is automatically considered a candidate.
FilepathPatterns []*regexp.Regexp
// EvidencePattern is a list of regular expressions that will be used to match against the file contents of a
// given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing.
EvidencePatterns []*regexp.Regexp
// CPE is the CPE we want to match against
CPEs []pkg.CPE
}
func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) {
doesFilepathMatch := true
if len(c.FilepathPatterns) > 0 {
doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location)
}
if !doesFilepathMatch {
return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package)
}
contents, err := getContents(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to get read contents for file: %+v", err)
}
var classifiedPackage *pkg.Package
for _, patternTemplate := range c.EvidencePatterns {
if !patternTemplate.Match(contents) {
continue
}
matchMetadata := internal.MatchNamedCaptureGroups(patternTemplate, string(contents))
if classifiedPackage == nil {
classifiedPackage = &pkg.Package{
Name: path.Base(reader.VirtualPath),
Version: matchMetadata["version"],
Language: pkg.Binary,
Locations: source.NewLocationSet(reader.Location),
Type: pkg.BinaryPkg,
CPEs: c.CPEs,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Classifier: c.Package,
RealPath: reader.RealPath,
VirtualPath: reader.VirtualPath,
},
}
break
}
}
return classifiedPackage, nil, nil
}
func getContents(reader source.LocationReadCloser) ([]byte, error) {
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil {
return nil, fmt.Errorf("unable to get union reader for file: %+v", err)
}
contents, err := io.ReadAll(unionReader)
if err != nil {
return nil, fmt.Errorf("unable to get contents for file: %+v", err)
}
return contents, nil
}

View file

@ -9,9 +9,11 @@ import (
"path"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
@ -35,6 +37,11 @@ func NewJavascriptLockCataloger() *common.GenericCataloger {
return common.NewGenericCataloger(nil, globParsers, "javascript-lock-cataloger", addLicenses)
}
func NewNodeBinaryCataloger() *generic.Cataloger {
return generic.NewCataloger("node-binary-cataloger").
WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...)
}
func addLicenses(resolver source.FileResolver, location source.Location, p *pkg.Package) error {
dir := path.Dir(location.RealPath)
pkgPath := []string{dir, "node_modules"}

View file

@ -0,0 +1,43 @@
package javascript
import (
"regexp"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
var nodeClassifier = generic.Classifier{
Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching
FilepathPatterns: []*regexp.Regexp{
// note: should we just parse all files resolved with executable mimetypes
// regexp that matches node binary
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatterns: []*regexp.Regexp{
// regex that matches node.js/vx.y.z
regexp.MustCompile(`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
},
CPEs: []pkg.CPE{
pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
},
}
func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
p, _, err := nodeClassifier.Examine(reader)
if err != nil {
log.Trace("failed to find node.js package: %+v", err)
return nil, nil, nil // we can silently fail here to reduce warning noise
}
// TODO add node specific metadata to the packages to help with vulnerability matching
if p != nil {
p.Language = pkg.JavaScript
return []pkg.Package{*p}, nil, nil
}
p.SetID()
return nil, nil, nil
}

View file

@ -24,6 +24,7 @@ const (
Swift Language = "swift"
CPP Language = "c++"
Haskell Language = "haskell"
Binary Language = "binary"
)
// AllLanguages is a set of all programming languages detected by syft.

View file

@ -13,6 +13,7 @@ const (
UnknownMetadataType MetadataType = "UnknownMetadata"
ApkMetadataType MetadataType = "ApkMetadata"
AlpmMetadataType MetadataType = "AlpmMetadata"
BinaryMetadataType MetadataType = "BinaryMetadata"
DpkgMetadataType MetadataType = "DpkgMetadata"
GemMetadataType MetadataType = "GemMetadata"
JavaMetadataType MetadataType = "JavaMetadata"
@ -35,6 +36,7 @@ const (
var AllMetadataTypes = []MetadataType{
ApkMetadataType,
AlpmMetadataType,
BinaryMetadataType,
DpkgMetadataType,
GemMetadataType,
JavaMetadataType,
@ -57,6 +59,7 @@ var AllMetadataTypes = []MetadataType{
var MetadataTypeByName = map[MetadataType]reflect.Type{
ApkMetadataType: reflect.TypeOf(ApkMetadata{}),
AlpmMetadataType: reflect.TypeOf(AlpmMetadata{}),
BinaryMetadataType: reflect.TypeOf(BinaryMetadata{}),
DpkgMetadataType: reflect.TypeOf(DpkgMetadata{}),
GemMetadataType: reflect.TypeOf(GemMetadata{}),
JavaMetadataType: reflect.TypeOf(JavaMetadata{}),

View file

@ -8,6 +8,7 @@ type Type string
const (
// the full set of supported packages
UnknownPkg Type = "UnknownPackage"
BinaryPkg Type = "binary"
ApkPkg Type = "apk"
AlpmPkg Type = "alpm"
GemPkg Type = "gem"
@ -33,6 +34,7 @@ const (
var AllPkgs = []Type{
ApkPkg,
AlpmPkg,
BinaryPkg,
GemPkg,
DebPkg,
RpmPkg,

View file

@ -87,10 +87,12 @@ func TestTypeFromPURL(t *testing.T) {
expectedTypes.Add(string(ty))
}
// testing microsoft packages and jenkins-plugins is not valid for purl at this time
// testing microsoft packages and jenkins-plugins and custom binary type
// is not valid for purl at this time
expectedTypes.Remove(string(KbPkg))
expectedTypes.Remove(string(JenkinsPluginPkg))
expectedTypes.Remove(string(PortagePkg))
expectedTypes.Remove(string(BinaryPkg))
for _, test := range tests {
t.Run(string(test.expected), func(t *testing.T) {

View file

@ -151,6 +151,7 @@ func TestPackageURL(t *testing.T) {
expectedTypes.Remove(string(DebPkg))
expectedTypes.Remove(string(GoModulePkg))
expectedTypes.Remove(string(HackagePkg))
expectedTypes.Remove(string(BinaryPkg))
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {

View file

@ -10,6 +10,7 @@ import (
func TestPackagesCmdFlags(t *testing.T) {
hiddenPackagesImage := "docker-archive:" + getFixtureImage(t, "image-hidden-packages")
coverageImage := "docker-archive:" + getFixtureImage(t, "image-pkg-coverage")
nodeBinaryImage := "docker-archive:" + getFixtureImage(t, "image-node-binary")
//badBinariesImage := "docker-archive:" + getFixtureImage(t, "image-bad-binaries")
tmp := t.TempDir() + "/"
@ -142,6 +143,15 @@ func TestPackagesCmdFlags(t *testing.T) {
assertSuccessfulReturnCode,
},
},
{
name: "catalog-node-js-binary",
args: []string{"packages", "-o", "json", nodeBinaryImage},
assertions: []traitAssertion{
assertJsonReport,
assertInOutput("node.js"),
assertSuccessfulReturnCode,
},
},
{
name: "responds-to-package-cataloger-search-options",
args: []string{"packages", "-vv"},

View file

@ -0,0 +1 @@
FROM node:19-alpine3.15

View file

@ -85,6 +85,7 @@ func TestPkgCoverageImage(t *testing.T) {
definedPkgs.Remove(string(pkg.CocoapodsPkg))
definedPkgs.Remove(string(pkg.ConanPkg))
definedPkgs.Remove(string(pkg.HackagePkg))
definedPkgs.Remove(string(pkg.BinaryPkg))
var cases []testCase
cases = append(cases, commonTestCases...)
@ -206,6 +207,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
observedLanguages.Remove(pkg.UnknownLanguage.String())
definedLanguages.Remove(pkg.UnknownLanguage.String())
observedPkgs.Remove(string(pkg.UnknownPkg))
definedPkgs.Remove(string(pkg.BinaryPkg))
definedPkgs.Remove(string(pkg.UnknownPkg))
// for directory scans we should not expect to see any of the following package types