fix: further improvements to CPE generation for apk packages (#1623)

* fix: consider upstream logic during apk cpe gen
* fix: correct apk CPE for go
* fix: correct apk CPE for ruby
* fix: correct apk CPE for bazel
* fix: correct apk CPE for clang
* fix: correct apk CPE for openjdk
* fix: correct apk CPE for glibc
* fix: correct apk CPE for gli
* fix: correct apk CPE for bas
* fix: correct apk CPE for alsa-lib
* fix: correct apk CPE for alsa
* fix: determine apk cpe vendor from known URLs
* fix: add more url prefix->vendor mappings for apk
* refactor: allow reuse of vendor by url prefix logic
* feat: extract username as vendor candidate from github/gitlab

Signed-off-by: Weston Steimel <weston.steimel@anchore.com>
This commit is contained in:
Weston Steimel 2023-02-27 18:16:04 +00:00 committed by GitHub
parent d23b4d4cbd
commit fbda21f4f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 326 additions and 57 deletions

View file

@ -13,21 +13,12 @@ var (
func pythonCandidateVendorsFromName(v string) fieldCandidateSet {
vendors := newFieldCandidateSet()
vendors.add(fieldCandidate{
value: v,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
vendors.addValue(v)
vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.PythonPkg, v, v)...)
vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.PythonPkg, v)...)
for _, av := range additionalVendorsForPython(v) {
vendors.add(fieldCandidate{
value: av,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
vendors.addValue(av)
vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.PythonPkg, av, av)...)
vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.PythonPkg, av)...)
}
@ -37,6 +28,7 @@ func pythonCandidateVendorsFromName(v string) fieldCandidateSet {
func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
vendors := newFieldCandidateSet()
upstream := m.Upstream()
for _, p := range pythonPrefixes {
if strings.HasPrefix(m.Package, p) {
@ -44,8 +36,8 @@ func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
vendors.union(pythonCandidateVendorsFromName(t))
}
if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) {
t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p))
if upstream != m.Package && strings.HasPrefix(upstream, p) {
t := strings.ToLower(strings.TrimPrefix(upstream, p))
vendors.union(pythonCandidateVendorsFromName(t))
}
}
@ -55,12 +47,7 @@ func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
func pythonCandidateProductsFromName(p string) fieldCandidateSet {
products := newFieldCandidateSet()
products.add(fieldCandidate{
value: p,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
products.addValue(p)
products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.PythonPkg, p)...)
products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.PythonPkg, p)...)
return products
@ -68,6 +55,7 @@ func pythonCandidateProductsFromName(p string) fieldCandidateSet {
func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
products := newFieldCandidateSet()
upstream := m.Upstream()
for _, p := range pythonPrefixes {
if strings.HasPrefix(m.Package, p) {
@ -75,8 +63,8 @@ func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
products.union(pythonCandidateProductsFromName(t))
}
if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) {
t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p))
if upstream != m.Package && strings.HasPrefix(upstream, p) {
t := strings.ToLower(strings.TrimPrefix(upstream, p))
products.union(pythonCandidateProductsFromName(t))
}
}
@ -86,12 +74,7 @@ func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
func rubyCandidateVendorsFromName(v string) fieldCandidateSet {
vendors := newFieldCandidateSet()
vendors.add(fieldCandidate{
value: v,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
vendors.addValue(v)
vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.GemPkg, v, v)...)
vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.GemPkg, v)...)
return vendors
@ -99,16 +82,19 @@ func rubyCandidateVendorsFromName(v string) fieldCandidateSet {
func rubyCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
vendors := newFieldCandidateSet()
upstream := m.Upstream()
for _, p := range rubyPrefixes {
if strings.HasPrefix(m.Package, p) {
t := strings.ToLower(strings.TrimPrefix(m.Package, p))
vendors.union(rubyCandidateVendorsFromName(t))
}
if upstream != "ruby" {
for _, p := range rubyPrefixes {
if strings.HasPrefix(m.Package, p) {
t := strings.ToLower(strings.TrimPrefix(m.Package, p))
vendors.union(rubyCandidateVendorsFromName(t))
}
if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) {
t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p))
vendors.union(rubyCandidateVendorsFromName(t))
if upstream != "" && upstream != m.Package && strings.HasPrefix(upstream, p) {
t := strings.ToLower(strings.TrimPrefix(upstream, p))
vendors.union(rubyCandidateVendorsFromName(t))
}
}
}
@ -117,12 +103,7 @@ func rubyCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
func rubyCandidateProductsFromName(p string) fieldCandidateSet {
products := newFieldCandidateSet()
products.add(fieldCandidate{
value: p,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
products.addValue(p)
products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.GemPkg, p)...)
products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.GemPkg, p)...)
return products
@ -130,22 +111,49 @@ func rubyCandidateProductsFromName(p string) fieldCandidateSet {
func rubyCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet {
products := newFieldCandidateSet()
upstream := m.Upstream()
for _, p := range rubyPrefixes {
if strings.HasPrefix(m.Package, p) {
t := strings.ToLower(strings.TrimPrefix(m.Package, p))
products.union(rubyCandidateProductsFromName(t))
}
if upstream != "ruby" {
for _, p := range rubyPrefixes {
if strings.HasPrefix(m.Package, p) {
t := strings.ToLower(strings.TrimPrefix(m.Package, p))
products.union(rubyCandidateProductsFromName(t))
}
if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) {
t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p))
products.union(rubyCandidateProductsFromName(t))
if upstream != "" && upstream != m.Package && strings.HasPrefix(upstream, p) {
t := strings.ToLower(strings.TrimPrefix(upstream, p))
products.union(rubyCandidateProductsFromName(t))
}
}
}
return products
}
func candidateVendorsFromAPKUpstream(m pkg.ApkMetadata) fieldCandidateSet {
vendors := newFieldCandidateSet()
upstream := m.Upstream()
if upstream != "" && upstream != m.Package {
vendors.addValue(upstream)
vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.ApkPkg, upstream, upstream)...)
vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.ApkPkg, upstream)...)
}
return vendors
}
func candidateProductsFromAPKUpstream(m pkg.ApkMetadata) fieldCandidateSet {
products := newFieldCandidateSet()
upstream := m.Upstream()
if upstream != "" {
products.addValue(upstream)
products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.ApkPkg, upstream)...)
products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.ApkPkg, upstream)...)
}
return products
}
func candidateVendorsForAPK(p pkg.Package) fieldCandidateSet {
metadata, ok := p.Metadata.(pkg.ApkMetadata)
if !ok {
@ -155,6 +163,13 @@ func candidateVendorsForAPK(p pkg.Package) fieldCandidateSet {
vendors := newFieldCandidateSet()
vendors.union(pythonCandidateVendorsFromAPK(metadata))
vendors.union(rubyCandidateVendorsFromAPK(metadata))
vendors.union(candidateVendorsFromAPKUpstream(metadata))
vendors.union(candidateVendorsFromURL(metadata.URL))
for v := range vendors {
v.disallowDelimiterVariations = true
v.disallowSubSelections = true
}
return vendors
}
@ -168,6 +183,12 @@ func candidateProductsForAPK(p pkg.Package) fieldCandidateSet {
products := newFieldCandidateSet()
products.union(pythonCandidateProductsFromAPK(metadata))
products.union(rubyCandidateProductsFromAPK(metadata))
products.union(candidateProductsFromAPKUpstream(metadata))
for p := range products {
p.disallowDelimiterVariations = true
p.disallowSubSelections = true
}
return products
}

View file

@ -24,13 +24,14 @@ func Test_candidateVendorsForAPK(t *testing.T) {
expected: []string{"python-cryptography_project", "cryptography", "cryptographyproject", "cryptography_project"},
},
{
name: "py2-pypdf OriginPackage",
name: "py2-pypdf with explicit different origin",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
OriginPackage: "py2-pypdf",
Package: "py2-pypdf",
OriginPackage: "abcdefg",
},
},
expected: []string{"pypdf", "pypdfproject", "pypdf_project"},
expected: []string{"pypdf", "pypdfproject", "pypdf_project", "abcdefg"},
},
{
name: "ruby-armadillo Package",
@ -41,10 +42,39 @@ func Test_candidateVendorsForAPK(t *testing.T) {
},
expected: []string{"armadillo"},
},
{
name: "python-3.6",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "python-3.6",
},
},
expected: []string{"python", "python_software_foundation"},
},
{
name: "ruby-3.6",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "ruby-3.6",
URL: "https://www.ruby-lang.org/",
},
},
expected: []string{"ruby", "ruby-lang"},
},
{
name: "make",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "make",
URL: "https://www.gnu.org/software/make",
},
},
expected: []string{"gnu"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expected, candidateVendorsForAPK(test.pkg).values(), "different vendors")
assert.ElementsMatch(t, test.expected, candidateVendorsForAPK(test.pkg).uniqueValues(), "different vendors")
})
}
}
@ -65,13 +95,14 @@ func Test_candidateProductsForAPK(t *testing.T) {
expected: []string{"cryptography", "python-cryptography"},
},
{
name: "py2-pypdf OriginPackage",
name: "py2-pypdf with explicit different origin",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
OriginPackage: "py2-pypdf",
Package: "py2-pypdf",
OriginPackage: "abcdefg",
},
},
expected: []string{"pypdf"},
expected: []string{"pypdf", "abcdefg"},
},
{
name: "ruby-armadillo Package",
@ -82,10 +113,39 @@ func Test_candidateProductsForAPK(t *testing.T) {
},
expected: []string{"armadillo"},
},
{
name: "python-3.6",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "python-3.6",
},
},
expected: []string{"python"},
},
{
name: "ruby-3.6",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "ruby-3.6",
URL: "https://www.ruby-lang.org/",
},
},
expected: []string{"ruby"},
},
{
name: "make",
pkg: pkg.Package{
Metadata: pkg.ApkMetadata{
Package: "make",
URL: "https://www.gnu.org/software/make",
},
},
expected: []string{"make"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expected, candidateProductsForAPK(test.pkg).values(), "different products")
assert.ElementsMatch(t, test.expected, candidateProductsForAPK(test.pkg).uniqueValues(), "different products")
})
}
}

View file

@ -203,6 +203,77 @@ var defaultCandidateAdditions = buildCandidateLookup(
candidateKey{PkgName: "nodejs-current"},
candidateAddition{AdditionalProducts: []string{"node.js"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "go"},
candidateAddition{AdditionalVendors: []string{"golang"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "ruby"},
candidateAddition{AdditionalVendors: []string{"ruby-lang"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "bazel"},
candidateAddition{AdditionalVendors: []string{"google"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "clang"},
candidateAddition{AdditionalVendors: []string{"llvm"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "openjdk"},
candidateAddition{AdditionalVendors: []string{"oracle"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "glibc"},
candidateAddition{AdditionalVendors: []string{"gnu"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "glib"},
candidateAddition{AdditionalVendors: []string{"gnome"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "bash"},
candidateAddition{AdditionalVendors: []string{"gnu"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "alsa-lib"},
candidateAddition{AdditionalVendors: []string{"alsa-project"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "alsa"},
candidateAddition{AdditionalVendors: []string{"alsa-project"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "make"},
candidateAddition{AdditionalVendors: []string{"gnu"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "git"},
candidateAddition{AdditionalVendors: []string{"git-scm"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "bind"},
candidateAddition{AdditionalVendors: []string{"isc"}},
},
{
pkg.ApkPkg,
candidateKey{PkgName: "libxpm"},
candidateAddition{AdditionalVendors: []string{"libxpm_project"}},
},
//
// Binary packages
{
pkg.BinaryPkg,

View file

@ -0,0 +1,56 @@
package cpe
import (
"regexp"
"strings"
"github.com/anchore/syft/internal"
)
var (
urlPrefixToVendors = map[string][]string{
"https://www.gnu.org/": {"gnu"},
"https://developer.gnome.org/": {"gnome"},
"https://www.ruby-lang.org/": {"ruby-lang"},
"https://llvm.org/": {"llvm"},
"https://www.isc.org/": {"isc"},
}
vendorExtractionPatterns = []*regexp.Regexp{
regexp.MustCompile(`^https://(?:github|gitlab)\.com/(?P<vendor>[\w\-]*?)/.*$`),
}
)
func candidateVendorsFromURL(url string) fieldCandidateSet {
vendors := newFieldCandidateSet()
for urlPrefix, additionalVendors := range urlPrefixToVendors {
if strings.HasPrefix(url, urlPrefix) {
for _, v := range additionalVendors {
vendors.add(fieldCandidate{
value: v,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
return vendors
}
}
}
for _, p := range vendorExtractionPatterns {
groups := internal.MatchNamedCaptureGroups(p, url)
if v, ok := groups["vendor"]; ok {
vendors.add(fieldCandidate{
value: v,
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
return vendors
}
}
return vendors
}

View file

@ -0,0 +1,61 @@
package cpe
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_candidateVendorsFromURL(t *testing.T) {
tests := []struct {
name string
url string
expected []string
}{
{
name: "empty",
url: "",
expected: []string{},
},
{
name: "no known vendors",
url: "https://something-unknown.com/126374623876/12345",
expected: []string{},
},
{
name: "gnu vendor from url",
url: "https://www.gnu.org/software/make",
expected: []string{"gnu"},
},
{
name: "github username as vendor",
url: "https://github.com/armadillo/abcxyz-12345",
expected: []string{"armadillo"},
},
{
name: "github username with - as vendor",
url: "https://github.com/1234-abc-xyz/hello",
expected: []string{"1234-abc-xyz"},
},
{
name: "gitlab username as vendor",
url: "https://gitlab.com/armadillo/abcxyz-12345",
expected: []string{"armadillo"},
},
{
name: "gitlab username with - as vendor",
url: "https://gitlab.com/1234-abc-xyz/hello",
expected: []string{"1234-abc-xyz"},
},
{
name: "github username as vendor from longer url",
url: "https://github.com/armadillo/abcxyz-12345/a/b/c/d/e/f/g",
expected: []string{"armadillo"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expected, candidateVendorsFromURL(test.url).uniqueValues(), "different vendors")
})
}
}