Enhance CPE generation for java GroupId and filtering (#402)

* enhance cpe generation for group id and filtering

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* rename group id const + add doc comment for HasAnyOfPrefixes

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-04-22 08:22:56 -04:00 committed by GitHub
parent 0c29090b42
commit d9de63c837
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 333 additions and 44 deletions

View file

@ -0,0 +1,14 @@
package internal
import "strings"
// HasAnyOfPrefixes returns an indication if the given string has any of the given prefixes.
func HasAnyOfPrefixes(input string, prefixes ...string) bool {
for _, prefix := range prefixes {
if strings.HasPrefix(input, prefix) {
return true
}
}
return false
}

View file

@ -0,0 +1,65 @@
package internal
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestHasAnyOfPrefixes(t *testing.T) {
tests := []struct {
name string
input string
prefixes []string
expected bool
}{
{
name: "go case",
input: "this has something",
prefixes: []string{
"this has",
"that does not have",
},
expected: true,
},
{
name: "no match",
input: "this has something",
prefixes: []string{
"this DOES NOT has",
"that does not have",
},
expected: false,
},
{
name: "empty",
input: "this has something",
prefixes: []string{},
expected: false,
},
{
name: "positive match last",
input: "this has something",
prefixes: []string{
"that does not have",
"this has",
},
expected: true,
},
{
name: "empty input",
input: "",
prefixes: []string{
"that does not have",
"this has",
},
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, HasAnyOfPrefixes(test.input, test.prefixes...))
})
}
}

View file

@ -10,12 +10,6 @@ import (
"github.com/facebookincubator/nvdtools/wfn"
)
// this is functionally equivalent to "*" and consistent with no input given (thus easier to test)
const any = ""
// this is a static mapping of known package names (keys) to official cpe names for each package
type candidateStore map[pkg.Type]map[string][]string
var productCandidatesByPkgType = candidateStore{
pkg.JavaPkg: {
"springframework": []string{"spring_framework", "springsource_spring_framework"},
@ -42,6 +36,25 @@ var productCandidatesByPkgType = candidateStore{
},
}
var cpeFilters = []filterFn{
// nolint: goconst
func(cpe pkg.CPE, p pkg.Package) bool {
// jira / atlassian should not apply to clients
if cpe.Vendor == "atlassian" && cpe.Product == "jira" && strings.Contains(p.Name, "client") {
return true
}
if cpe.Vendor == "jira" && cpe.Product == "jira" && strings.Contains(p.Name, "client") {
return true
}
return false
},
}
type filterFn func(cpe pkg.CPE, p pkg.Package) bool
// this is a static mapping of known package names (keys) to official cpe names for each package
type candidateStore map[pkg.Type]map[string][]string
func (s candidateStore) getCandidates(t pkg.Type, key string) []string {
if _, ok := s[t]; !ok {
return nil
@ -65,6 +78,20 @@ func newCPE(product, vendor, version, targetSW string) wfn.Attributes {
return cpe
}
func filterCpes(cpes []pkg.CPE, p pkg.Package, filters ...filterFn) (result []pkg.CPE) {
cpeLoop:
for _, cpe := range cpes {
for _, fn := range filters {
if fn(cpe, p) {
continue cpeLoop
}
}
// all filter functions passed on filtering this CPE
result = append(result, cpe)
}
return result
}
// generatePackageCPEs Create a list of CPEs, trying to guess the vendor, product tuple and setting TargetSoftware if possible
func generatePackageCPEs(p pkg.Package) []pkg.CPE {
targetSws := candidateTargetSoftwareAttrs(p)
@ -74,8 +101,8 @@ func generatePackageCPEs(p pkg.Package) []pkg.CPE {
keys := internal.NewStringSet()
cpes := make([]pkg.CPE, 0)
for _, product := range products {
for _, vendor := range append([]string{any}, vendors...) {
for _, targetSw := range append([]string{any}, targetSws...) {
for _, vendor := range append([]string{wfn.Any}, vendors...) {
for _, targetSw := range append([]string{wfn.Any}, targetSws...) {
// prevent duplicate entries...
key := fmt.Sprintf("%s|%s|%s|%s", product, vendor, p.Version, targetSw)
if keys.Contains(key) {
@ -90,6 +117,9 @@ func generatePackageCPEs(p pkg.Package) []pkg.CPE {
}
}
// filter out any known combinations that don't accurately represent this package
cpes = filterCpes(cpes, p, cpeFilters...)
sort.Sort(ByCPESpecificity(cpes))
return cpes
@ -157,6 +187,11 @@ func candidateProducts(p pkg.Package) []string {
func candidateProductsForJava(p pkg.Package) []string {
if product, _ := productAndVendorFromPomPropertiesGroupID(p); product != "" {
// ignore group ID info from a jenkins plugin, as using this info may imply that this package
// CPE belongs to the cloudbees org (or similar) which is wrong.
if p.Type == pkg.JenkinsPluginPkg && strings.ToLower(product) == "jenkins" {
return nil
}
return []string{product}
}
@ -177,7 +212,7 @@ func productAndVendorFromPomPropertiesGroupID(p pkg.Package) (string, string) {
return "", ""
}
if !hasAnyOfPrefixes(groupID, "com", "org") {
if !internal.HasAnyOfPrefixes(groupID, "com", "org") {
return "", ""
}
@ -209,26 +244,7 @@ func shouldConsiderGroupID(groupID string) bool {
return false
}
excludedGroupIDs := []string{
pkg.PomPropertiesGroupIDJiraPlugins,
pkg.PomPropertiesGroupIDJenkinsPlugins,
}
excludedGroupIDs := append([]string{pkg.JiraPluginPomPropertiesGroupID}, pkg.JenkinsPluginPomPropertiesGroupIDs...)
for _, excludedGroupID := range excludedGroupIDs {
if groupID == excludedGroupID {
return false
}
}
return true
}
func hasAnyOfPrefixes(input string, prefixes ...string) bool {
for _, prefix := range prefixes {
if strings.HasPrefix(input, prefix) {
return true
}
}
return false
return !internal.HasAnyOfPrefixes(groupID, excludedGroupIDs...)
}

View file

@ -220,7 +220,7 @@ func TestGeneratePackageCPEs(t *testing.T) {
},
},
{
name: "jenkins package identified via groupId",
name: "cloudbees jenkins package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
@ -242,6 +242,135 @@ func TestGeneratePackageCPEs(t *testing.T) {
"cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
},
},
{
name: "jenkins.io package identified via groupId prefix",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins.plugins.name.something",
},
},
},
expected: []string{
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
},
},
{
name: "jenkins.io package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
},
},
{
name: "jenkins-ci.io package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins-ci.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
},
},
{
name: "jenkins-ci.org package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.jenkins-ci.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*",
},
},
{
name: "jira-atlassian filtering",
p: pkg.Package{
Name: "jira_client_core",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.atlassian.jira",
ArtifactID: "jira_client_core",
},
},
},
expected: []string{
"cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:jira:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:*:jira:3.2:*:*:*:*:maven:*:*",
"cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:maven:*:*",
"cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:maven:*:*",
"cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:maven:*:*",
"cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:maven:*:*",
"cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:java:*:*",
"cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:maven:*:*",
},
},
}
for _, test := range tests {

View file

@ -106,8 +106,6 @@ func (j *archiveParser) parse() ([]pkg.Package, error) {
// lastly, add the parent package to the list (assuming the parent exists)
if parentPkg != nil {
// only the parent package gets the type, nested packages may be of a different package type (or not of a package type at all, since they may not be bundled)
parentPkg.Type = j.fileInfo.pkgType()
pkgs = append([]pkg.Package{*parentPkg}, pkgs...)
}
@ -143,7 +141,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
Language: pkg.Java,
Type: pkg.JavaPkg,
Type: j.fileInfo.pkgType(),
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath,

View file

@ -1,6 +1,19 @@
package pkg
import "github.com/package-url/packageurl-go"
import (
"github.com/anchore/syft/internal"
"github.com/package-url/packageurl-go"
)
const JiraPluginPomPropertiesGroupID = "com.atlassian.jira.plugins"
var JenkinsPluginPomPropertiesGroupIDs = []string{
"io.jenkins.plugins",
"org.jenkins.plugins",
"org.jenkins-ci.plugins",
"io.jenkins-ci.plugins",
"com.cloudbees.jenkins.plugins",
}
// JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship.
type JavaMetadata struct {
@ -22,7 +35,7 @@ type PomProperties struct {
// PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties.
func (p PomProperties) PkgTypeIndicated() Type {
if p.GroupID == PomPropertiesGroupIDJenkinsPlugins {
if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) {
return JenkinsPluginPkg
}
@ -52,6 +65,3 @@ func (m JavaMetadata) PackageURL() string {
return ""
}
const PomPropertiesGroupIDJenkinsPlugins = "com.cloudbees.jenkins.plugins"
const PomPropertiesGroupIDJiraPlugins = "com.atlassian.jira.plugins"

View file

@ -1,9 +1,10 @@
package pkg
import (
"testing"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
"testing"
)
func TestPomProperties_PkgTypeIndicated(t *testing.T) {
@ -24,7 +25,7 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) {
expectedType: JavaPkg,
},
{
name: "jenkins plugin",
name: "cloudbees jenkins plugin",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
@ -34,6 +35,61 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) {
},
expectedType: JenkinsPluginPkg,
},
{
name: "jenkins.io plugin",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
GroupID: "io.jenkins.plugins",
ArtifactID: "some artifact ID",
Version: "1",
},
expectedType: JenkinsPluginPkg,
},
{
name: "jenkins-ci.io plugin",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
GroupID: "io.jenkins-ci.plugins",
ArtifactID: "some artifact ID",
Version: "1",
},
expectedType: JenkinsPluginPkg,
},
{
name: "jenkins-ci.org plugin",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
GroupID: "org.jenkins-ci.plugins",
ArtifactID: "some artifact ID",
Version: "1",
},
expectedType: JenkinsPluginPkg,
},
{
name: "jenkins.org plugin",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
GroupID: "org.jenkins.plugins",
ArtifactID: "some artifact ID",
Version: "1",
},
expectedType: JenkinsPluginPkg,
},
{
name: "jenkins plugin prefix",
pomProperties: PomProperties{
Path: "some path",
Name: "some name",
GroupID: "com.cloudbees.jenkins.plugins.bluesteel",
ArtifactID: "some artifact ID",
Version: "1",
},
expectedType: JenkinsPluginPkg,
},
}
for _, tc := range cases {

View file

@ -6,6 +6,7 @@ type testCase struct {
name string
pkgType pkg.Type
pkgLanguage pkg.Language
duplicates int
pkgInfo map[string]string
}
@ -155,7 +156,6 @@ var commonTestCases = []testCase{
pkgLanguage: pkg.Java,
pkgInfo: map[string]string{
"example-java-app-maven": "0.1.0",
"example-jenkins-plugin": "1.0-SNAPSHOT", // the jenkins HPI file has a nested JAR of the same name
"joda-time": "2.9.2",
},
},
@ -163,6 +163,7 @@ var commonTestCases = []testCase{
name: "find jenkins plugins",
pkgType: pkg.JenkinsPluginPkg,
pkgLanguage: pkg.Java,
duplicates: 1, // there is a "example-jenkins-plugin" HPI, and nested within that a JAR of the same name
pkgInfo: map[string]string{
"example-jenkins-plugin": "1.0-SNAPSHOT",
},

View file

@ -96,7 +96,7 @@ func TestPkgCoverageImage(t *testing.T) {
pkgCount++
}
if pkgCount != len(c.pkgInfo) {
if pkgCount != len(c.pkgInfo)+c.duplicates {
t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range catalog.Enumerate(c.pkgType) {
t.Log(" ", a)
@ -175,7 +175,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
actualPkgCount++
}
if actualPkgCount != len(test.pkgInfo) {
if actualPkgCount != len(test.pkgInfo)+test.duplicates {
for actualPkg := range catalog.Enumerate(test.pkgType) {
t.Log(" ", actualPkg)
}