Remove internal string set (#2219)

* remove internal string set

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* incorporate changes from #2227

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* beef up the pkg.License.Merg() doc string

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2023-10-17 12:52:11 -04:00 committed by GitHub
parent f3ad8cf250
commit 7018573bf7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 346 additions and 163 deletions

View file

@ -7,10 +7,10 @@ import (
"github.com/iancoleman/strcase"
"github.com/mitchellh/go-homedir"
"github.com/scylladb/go-set/strset"
"github.com/anchore/clio"
"github.com/anchore/fangs"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg/cataloger"
golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang"
@ -147,8 +147,8 @@ func (cfg Catalog) ToCatalogerConfig() cataloger.Config {
var validDefaultSourceValues = []string{"registry", "docker", "podman", ""}
func checkDefaultSourceValues(source string) error {
validValues := internal.NewStringSet(validDefaultSourceValues...)
if !validValues.Contains(source) {
validValues := strset.New(validDefaultSourceValues...)
if !validValues.Has(source) {
validValuesString := strings.Join(validDefaultSourceValues, ", ")
return fmt.Errorf("%s is not a valid default source; please use one of the following: %s''", source, validValuesString)
}

View file

@ -6,7 +6,8 @@ import (
"sort"
"strings"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
)
@ -40,7 +41,7 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
// GlobMatch returns the path keys that match the given value(s).
func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
uniqueMatches := internal.NewStringSet()
uniqueMatches := strset.New()
for _, pattern := range patterns {
for entry := range z {
@ -54,7 +55,7 @@ func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
}
}
results := uniqueMatches.ToSlice()
results := uniqueMatches.List()
sort.Strings(results)
return results

View file

@ -1,20 +1,21 @@
package licenses
import "github.com/anchore/syft/internal"
// all of these taken from https://github.com/golang/pkgsite/blob/8996ff632abee854aef1b764ca0501f262f8f523/internal/licenses/licenses.go#L338
// which unfortunately is not exported. But fortunately is under BSD-style license. Take note that this list has
// been manually updated to include more license filenames (see https://github.com/anchore/syft/pull/2227).
var (
FileNames = []string{
func FileNames() []string {
return []string{
"AL2.0",
"COPYING",
"COPYING.md",
"COPYING.markdown",
"COPYING.txt",
"LGPL2.1",
"LICENCE",
"LICENCE.md",
"LICENCE.markdown",
"licence.txt",
"LICENCE.txt",
"LICENSE",
"LICENSE.md",
@ -49,9 +50,5 @@ var (
"MIT_LICENCE",
"UNLICENSE",
"UNLICENCE",
"AL2.0",
"LGPL2.1",
}
FileNameSet = internal.NewStringSet(FileNames...)
)
}

View file

@ -95,7 +95,7 @@ func separateLicenses(p pkg.Package) (spdx, other cyclonedx.Licenses, expression
// singular expression case
// only ID field here since we guarantee that the license is valid
if value, exists := spdxlicense.ID(l.SPDXExpression); exists {
if !l.URLs.Empty() {
if len(l.URLs) > 0 {
processLicenseURLs(l, value, &spdxc)
continue
}
@ -123,7 +123,7 @@ func separateLicenses(p pkg.Package) (spdx, other cyclonedx.Licenses, expression
// license string that are not valid spdx expressions or ids
// we only use license Name here since we cannot guarantee that the license is a valid SPDX expression
if !l.URLs.Empty() {
if len(l.URLs) > 0 {
processLicenseURLs(l, "", &otherc)
continue
}
@ -137,7 +137,7 @@ func separateLicenses(p pkg.Package) (spdx, other cyclonedx.Licenses, expression
}
func processLicenseURLs(l pkg.License, spdxID string, populate *cyclonedx.Licenses) {
for _, url := range l.URLs.ToSlice() {
for _, url := range l.URLs {
if spdxID == "" {
*populate = append(*populate, cyclonedx.LicenseChoice{
License: &cyclonedx.License{

View file

@ -4,9 +4,9 @@ import (
"testing"
"github.com/CycloneDX/cyclonedx-go"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
)
@ -191,7 +191,9 @@ func Test_encodeLicense(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, encodeLicenses(test.input))
if d := cmp.Diff(test.expected, encodeLicenses(test.input)); d != "" {
t.Errorf("unexpected license (-want +got):\n%s", d)
}
})
}
}
@ -223,7 +225,7 @@ func TestDecodeLicenses(t *testing.T) {
Value: "RandomLicense",
// CycloneDX specification doesn't give a field for determining the license type
Type: license.Declared,
URLs: internal.NewStringSet(),
URLs: []string{},
},
},
},
@ -243,7 +245,7 @@ func TestDecodeLicenses(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet(),
URLs: []string{},
},
},
},
@ -262,7 +264,7 @@ func TestDecodeLicenses(t *testing.T) {
Value: "MIT AND GPL-3.0-only WITH Classpath-exception-2.0",
SPDXExpression: "MIT AND GPL-3.0-only WITH Classpath-exception-2.0",
Type: license.Declared,
URLs: internal.NewStringSet(),
URLs: []string{},
},
},
},

View file

@ -12,7 +12,6 @@ import (
"github.com/stretchr/testify/require"
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/formats/internal/testutils"
@ -118,7 +117,7 @@ func Test_encodeDecodeFileMetadata(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: "MIT",
URLs: internal.NewStringSet("https://example.org/license"),
URLs: []string{"https://example.org/license"},
Locations: file.LocationSet{},
}),
Language: "language",

View file

@ -214,11 +214,18 @@ func toLicenseModel(pkgLicenses []pkg.License) (modelLicenses []model.License) {
if v := l.Locations.ToSlice(); v != nil {
locations = v
}
// format model must have allocated collections
urls := l.URLs
if urls == nil {
urls = []string{}
}
modelLicenses = append(modelLicenses, model.License{
Value: l.Value,
SPDXExpression: l.SPDXExpression,
Type: l.Type,
URLs: l.URLs.ToSlice(),
URLs: urls,
Locations: locations,
})
}

View file

@ -10,7 +10,6 @@ import (
"github.com/google/go-cmp/cmp"
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cpe"
@ -136,7 +135,7 @@ func toSyftLicenses(m []model.License) (p []pkg.License) {
Value: l.Value,
SPDXExpression: l.SPDXExpression,
Type: l.Type,
URLs: internal.NewStringSet(l.URLs...),
URLs: l.URLs,
Locations: file.NewLocationSet(l.Locations...),
})
}

View file

@ -4,8 +4,8 @@ import (
"sync"
"github.com/jinzhu/copier"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
)
@ -124,13 +124,13 @@ func (c *Collection) addTypeToIndex(p Package) {
}
func (c *Collection) addPathsToIndex(p Package) {
observedPaths := internal.NewStringSet()
observedPaths := strset.New()
for _, l := range p.Locations.ToSlice() {
if l.RealPath != "" && !observedPaths.Contains(l.RealPath) {
if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
c.addPathToIndex(p.id, l.RealPath)
observedPaths.Add(l.RealPath)
}
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) {
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Has(l.VirtualPath) {
c.addPathToIndex(p.id, l.VirtualPath)
observedPaths.Add(l.VirtualPath)
}
@ -173,13 +173,13 @@ func (c *Collection) deleteTypeFromIndex(p Package) {
}
func (c *Collection) deletePathsFromIndex(p Package) {
observedPaths := internal.NewStringSet()
observedPaths := strset.New()
for _, l := range p.Locations.ToSlice() {
if l.RealPath != "" && !observedPaths.Contains(l.RealPath) {
if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
c.deletePathFromIndex(p.id, l.RealPath)
observedPaths.Add(l.RealPath)
}
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) {
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Has(l.VirtualPath) {
c.deletePathFromIndex(p.id, l.VirtualPath)
observedPaths.Add(l.VirtualPath)
}

View file

@ -13,7 +13,6 @@ import (
"github.com/facebookincubator/nvdtools/wfn"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
@ -118,13 +117,13 @@ func Generate(p pkg.Package) []cpe.CPE {
return nil
}
keys := internal.NewStringSet()
keys := strset.New()
cpes := make([]cpe.CPE, 0)
for _, product := range products {
for _, vendor := range vendors {
// prevent duplicate entries...
key := fmt.Sprintf("%s|%s|%s", product, vendor, p.Version)
if keys.Contains(key) {
if keys.Has(key) {
continue
}
keys.Add(key)

View file

@ -7,6 +7,8 @@ import (
"sort"
"strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
)
@ -18,7 +20,7 @@ var (
)
func parseLicensesFromCopyright(reader io.Reader) []string {
findings := internal.NewStringSet()
findings := strset.New()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
@ -31,7 +33,7 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
}
}
results := findings.ToSlice()
results := findings.List()
sort.Strings(results)

View file

@ -18,6 +18,7 @@ import (
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/storage/memory"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
@ -31,6 +32,7 @@ type goLicenses struct {
opts GoCatalogerOpts
localModCacheResolver file.WritableResolver
progress *monitor.CatalogerTask
licenseFileNames *strset.Set
}
func newGoLicenses(opts GoCatalogerOpts) goLicenses {
@ -42,6 +44,7 @@ func newGoLicenses(opts GoCatalogerOpts) goLicenses {
RemoveOnCompletion: true,
Title: "Downloading go mod",
},
licenseFileNames: strset.New(licenses.FileNames()...),
}
}
@ -77,7 +80,7 @@ func modCacheResolver(modCacheDir string) file.WritableResolver {
}
func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) {
licenses, err = findLicenses(resolver,
licenses, err = c.findLicenses(resolver,
fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
)
if err != nil || len(licenses) > 0 {
@ -102,7 +105,7 @@ func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]p
// if we're running against a directory on the filesystem, it may not include the
// user's homedir / GOPATH, so we defer to using the localModCacheResolver
return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
}
func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
@ -139,7 +142,37 @@ func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]
log.Tracef("remote proxy walk failed for: %s", moduleName)
}
return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
}
func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
out = make([]pkg.License, 0)
if resolver == nil {
return
}
locations, err := resolver.FilesByGlob(globMatch)
if err != nil {
return nil, err
}
for _, l := range locations {
fileName := path.Base(l.RealPath)
if c.licenseFileNames.Has(fileName) {
contents, err := resolver.FileContentsByLocation(l)
if err != nil {
return nil, err
}
parsed, err := licenses.Parse(contents, l)
if err != nil {
return nil, err
}
out = append(out, parsed...)
}
}
return
}
func moduleDir(moduleName, moduleVersion string) string {
@ -157,36 +190,6 @@ func requireCollection(licenses []pkg.License) []pkg.License {
return licenses
}
func findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
out = make([]pkg.License, 0)
if resolver == nil {
return
}
locations, err := resolver.FilesByGlob(globMatch)
if err != nil {
return nil, err
}
for _, l := range locations {
fileName := path.Base(l.RealPath)
if licenses.FileNameSet.Contains(fileName) {
contents, err := resolver.FileContentsByLocation(l)
if err != nil {
return nil, err
}
parsed, err := licenses.Parse(contents, l)
if err != nil {
return nil, err
}
out = append(out, parsed...)
}
}
return
}
var capReplacer = regexp.MustCompile("[A-Z]")
func processCaps(s string) string {

View file

@ -13,7 +13,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/license"
@ -37,7 +36,6 @@ func Test_LocalLicenseSearch(t *testing.T) {
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
Locations: file.NewLocationSet(loc1),
URLs: internal.NewStringSet(),
},
},
{
@ -48,7 +46,6 @@ func Test_LocalLicenseSearch(t *testing.T) {
SPDXExpression: "MIT",
Type: license.Concluded,
Locations: file.NewLocationSet(loc2),
URLs: internal.NewStringSet(),
},
},
}
@ -128,7 +125,6 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
Locations: file.NewLocationSet(loc1),
URLs: internal.NewStringSet(),
},
},
{
@ -139,7 +135,6 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
SPDXExpression: "MIT",
Type: license.Concluded,
Locations: file.NewLocationSet(loc2),
URLs: internal.NewStringSet(),
},
},
}

View file

@ -332,7 +332,7 @@ func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
var fileLicenses []pkg.License
for _, filename := range licenses.FileNames {
for _, filename := range licenses.FileNames() {
licenseMatches := j.fileManifest.GlobMatch("/META-INF/" + filename)
if len(licenseMatches) == 0 {
// Try the root directory if it's not in META-INF

View file

@ -12,9 +12,9 @@ import (
"testing"
"github.com/gookit/color"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
@ -163,7 +163,6 @@ func TestParseJar(t *testing.T) {
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
URLs: internal.NewStringSet(),
Locations: file.NewLocationSet(file.NewLocation("test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar")),
},
),
@ -237,7 +236,6 @@ func TestParseJar(t *testing.T) {
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
URLs: internal.NewStringSet(),
Locations: file.NewLocationSet(file.NewLocation("test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar")),
},
),
@ -594,7 +592,7 @@ func TestParseNestedJar(t *testing.T) {
})
require.NoError(t, err)
expectedNameVersionPairSet := internal.NewStringSet()
expectedNameVersionPairSet := strset.New()
makeKey := func(p *pkg.Package) string {
if p == nil {
@ -607,24 +605,24 @@ func TestParseNestedJar(t *testing.T) {
expectedNameVersionPairSet.Add(makeKey(&e))
}
actualNameVersionPairSet := internal.NewStringSet()
actualNameVersionPairSet := strset.New()
for _, a := range actual {
a := a
key := makeKey(&a)
actualNameVersionPairSet.Add(key)
if !expectedNameVersionPairSet.Contains(key) {
if !expectedNameVersionPairSet.Has(key) {
t.Errorf("extra package: %s", a)
}
}
for _, key := range expectedNameVersionPairSet.ToSlice() {
if !actualNameVersionPairSet.Contains(key) {
for _, key := range expectedNameVersionPairSet.List() {
if !actualNameVersionPairSet.Has(key) {
t.Errorf("missing package: %s", key)
}
}
if len(actual) != len(expectedNameVersionPairSet) {
t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet))
if len(actual) != expectedNameVersionPairSet.Size() {
t.Fatalf("unexpected package count: %d!=%d", len(actual), expectedNameVersionPairSet.Size())
}
for _, a := range actual {
@ -748,7 +746,7 @@ func Test_newPackageFromMavenData(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://opensource.org/licenses/MIT"),
URLs: []string{"https://opensource.org/licenses/MIT"},
Locations: file.NewLocationSet(file.NewLocation("some-license-path")),
},
},
@ -785,7 +783,7 @@ func Test_newPackageFromMavenData(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://opensource.org/licenses/MIT"),
URLs: []string{"https://opensource.org/licenses/MIT"},
Locations: file.NewLocationSet(file.NewLocation("some-license-path")),
},
),

View file

@ -11,7 +11,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/vifraa/gopom"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
@ -342,19 +341,18 @@ func Test_parsePomXMLProject(t *testing.T) {
Value: "The Apache Software License, Version 2.0",
SPDXExpression: "", // TODO: ideally we would parse this title to get Apache-2.0 (created issue #2210 https://github.com/anchore/syft/issues/2210)
Type: license.Declared,
URLs: internal.NewStringSet("http://www.apache.org/licenses/LICENSE-2.0.txt"),
URLs: []string{"http://www.apache.org/licenses/LICENSE-2.0.txt"},
Locations: file.NewLocationSet(jarLocation),
},
{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet(),
Locations: file.NewLocationSet(jarLocation),
},
{
Type: license.Declared,
URLs: internal.NewStringSet("https://opensource.org/license/unlicense/"),
URLs: []string{"https://opensource.org/license/unlicense/"},
Locations: file.NewLocationSet(jarLocation),
},
},

View file

@ -5,7 +5,8 @@ import (
"fmt"
"regexp"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -51,7 +52,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
var pkgs []pkg.Package
scanner := bufio.NewScanner(reader)
parsedPackages := internal.NewStringSet()
parsedPackages := strset.New()
currentPackage := noPackage
currentVersion := noVersion
@ -60,7 +61,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
if packageName := findPackageName(line); packageName != noPackage {
// When we find a new package, check if we have unsaved identifiers
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Contains(currentPackage+"@"+currentVersion) {
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}
@ -68,7 +69,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
currentPackage = packageName
} else if version := findPackageVersion(line); version != noVersion {
currentVersion = version
} else if packageName, version := findPackageAndVersion(line); packageName != noPackage && version != noVersion && !parsedPackages.Contains(packageName+"@"+version) {
} else if packageName, version := findPackageAndVersion(line); packageName != noPackage && version != noVersion && !parsedPackages.Has(packageName+"@"+version) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, packageName, version))
parsedPackages.Add(packageName + "@" + version)
@ -79,7 +80,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
}
// check if we have valid unsaved data after end-of-file has reached
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Contains(currentPackage+"@"+currentVersion) {
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}

View file

@ -9,7 +9,8 @@ import (
"strconv"
"strings"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -105,7 +106,7 @@ func addLicenses(resolver file.Resolver, dbLocation file.Location, p *pkg.Packag
return
}
findings := internal.NewStringSet()
findings := strset.New()
scanner := bufio.NewScanner(licenseReader)
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
@ -115,7 +116,7 @@ func addLicenses(resolver file.Resolver, dbLocation file.Location, p *pkg.Packag
}
}
licenseCandidates := findings.ToSlice()
licenseCandidates := findings.List()
p.Licenses = pkg.NewLicenseSet(pkg.NewLicensesFromLocation(*location, licenseCandidates...)...)
p.Locations.Add(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
}

View file

@ -4,7 +4,8 @@ import (
"bufio"
"strings"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -13,7 +14,7 @@ import (
var _ generic.Parser = parseGemFileLockEntries
var sectionsOfInterest = internal.NewStringSet("GEM", "GIT", "PATH", "PLUGIN SOURCE")
var sectionsOfInterest = strset.New("GEM", "GIT", "PATH", "PLUGIN SOURCE")
// parseGemFileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered.
func parseGemFileLockEntries(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
@ -30,7 +31,7 @@ func parseGemFileLockEntries(_ file.Resolver, _ *generic.Environment, reader fil
// start of section
currentSection = sanitizedLine
continue
} else if !sectionsOfInterest.Contains(currentSection) {
} else if !sectionsOfInterest.Has(currentSection) {
// skip this line, we're in the wrong section
continue
}

View file

@ -4,7 +4,8 @@ import (
"fmt"
"sort"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -23,11 +24,11 @@ var _ sort.Interface = (*Licenses)(nil)
// this is different for licenses since we're only looking for evidence
// of where a license was declared/concluded for a given package
type License struct {
Value string `json:"value"`
SPDXExpression string `json:"spdxExpression"`
Type license.Type `json:"type"`
URLs internal.StringSet `hash:"ignore"`
Locations file.LocationSet `hash:"ignore"`
Value string
SPDXExpression string
Type license.Type
URLs []string `hash:"ignore"`
Locations file.LocationSet `hash:"ignore"`
}
type Licenses []License
@ -77,7 +78,6 @@ func NewLicenseFromType(value string, t license.Type) License {
Value: value,
SPDXExpression: spdxExpression,
Type: t,
URLs: internal.NewStringSet(),
Locations: file.NewLocationSet(),
}
}
@ -109,11 +109,16 @@ func NewLicenseFromLocations(value string, locations ...file.Location) License {
func NewLicenseFromURLs(value string, urls ...string) License {
l := NewLicense(value)
for _, u := range urls {
if u != "" {
l.URLs.Add(u)
s := strset.New()
for _, url := range urls {
if url != "" {
s.Add(url)
}
}
l.URLs = s.List()
sort.Strings(l.URLs)
return l
}
@ -123,12 +128,15 @@ func NewLicenseFromFields(value, url string, location *file.Location) License {
l.Locations.Add(*location)
}
if url != "" {
l.URLs.Add(url)
l.URLs = append(l.URLs, url)
}
return l
}
// this is a bit of a hack to not infinitely recurse when hashing a license
// Merge two licenses into a new license object. If the merge is not possible due to unmergeable fields
// (e.g. different values for Value, SPDXExpression, Type, or any non-collection type) an error is returned.
// TODO: this is a bit of a hack to not infinitely recurse when hashing a license
func (s License) Merge(l License) (*License, error) {
sHash, err := artifact.IDByHash(s)
if err != nil {
@ -145,11 +153,24 @@ func (s License) Merge(l License) (*License, error) {
return nil, fmt.Errorf("cannot merge licenses with different hash")
}
s.URLs.Add(l.URLs.ToSlice()...)
if s.Locations.Empty() && l.Locations.Empty() {
// try to keep s.URLs unallocated unless necessary (which is the default state from the constructor)
if len(l.URLs) > 0 {
s.URLs = append(s.URLs, l.URLs...)
}
if len(s.URLs) > 0 {
s.URLs = strset.New(s.URLs...).List()
sort.Strings(s.URLs)
}
if l.Locations.Empty() {
return &s, nil
}
s.Locations.Add(l.Locations.ToSlice()...)
// since the set instance has a reference type (map) we must make a new instance
locations := file.NewLocationSet(s.Locations.ToSlice()...)
locations.Add(l.Locations.ToSlice()...)
s.Locations = locations
return &s, nil
}

View file

@ -3,9 +3,8 @@ package pkg
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/google/go-cmp/cmp"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
)
@ -97,7 +96,7 @@ func TestLicenseSet_Add(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://example.com"),
URLs: []string{"https://example.com"},
Locations: file.NewLocationSet(file.NewLocation("/place")),
},
},
@ -115,14 +114,13 @@ func TestLicenseSet_Add(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Concluded,
URLs: internal.NewStringSet(),
Locations: file.NewLocationSet(),
},
{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://example.com"),
URLs: []string{"https://example.com"},
Locations: file.NewLocationSet(file.NewLocation("/place")),
},
},
@ -133,7 +131,32 @@ func TestLicenseSet_Add(t *testing.T) {
s := NewLicenseSet()
s.Add(tt.licenses...)
testMe := s.ToSlice()
assert.Equal(t, tt.want, testMe)
if d := cmp.Diff(tt.want, testMe, cmp.Comparer(defaultLicenseComparer)); d != "" {
t.Errorf("unexpected license set (-want +got):\n%s", d)
}
})
}
}
func defaultLocationComparer(x, y file.Location) bool {
return cmp.Equal(x.Coordinates, y.Coordinates) && cmp.Equal(x.VirtualPath, y.VirtualPath)
}
func defaultLicenseComparer(x, y License) bool {
return cmp.Equal(x, y, cmp.Comparer(defaultLocationComparer), cmp.Comparer(
func(x, y file.LocationSet) bool {
xs := x.ToSlice()
ys := y.ToSlice()
if len(xs) != len(ys) {
return false
}
for i, xe := range xs {
ye := ys[i]
if !defaultLocationComparer(xe, ye) {
return false
}
}
return true
},
))
}

View file

@ -9,6 +9,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
)
func Test_Hash(t *testing.T) {
@ -18,11 +19,8 @@ func Test_Hash(t *testing.T) {
loc2 := file.NewLocation("place!")
loc2.FileSystemID = "fs2" // important! there is a different file system ID
lic1 := NewLicenseFromLocations("MIT", loc1)
lic2 := NewLicenseFromLocations("MIT", loc2)
lic1.URLs.Add("foo")
lic2.URLs.Add("bar") // we also want to check the URLs are ignored
lic1 := NewLicenseFromFields("MIT", "foo", &loc1)
lic2 := NewLicenseFromFields("MIT", "bar", &loc2)
hash1, err := artifact.IDByHash(lic1)
require.NoError(t, err)
@ -97,3 +95,134 @@ func Test_Sort(t *testing.T) {
}
}
func TestLicense_Merge(t *testing.T) {
locA := file.NewLocation("a")
locB := file.NewLocation("b")
tests := []struct {
name string
subject License
other License
want License
wantErr require.ErrorAssertionFunc
}{
{
name: "valid merge",
subject: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"b", "a",
},
Locations: file.NewLocationSet(locA),
},
other: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"c", "d",
},
Locations: file.NewLocationSet(locB),
},
want: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"a", "b", "c", "d",
},
Locations: file.NewLocationSet(locA, locB),
},
},
{
name: "mismatched value",
subject: License{
Value: "DIFFERENT!!",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"b", "a",
},
Locations: file.NewLocationSet(locA),
},
other: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"c", "d",
},
Locations: file.NewLocationSet(locB),
},
wantErr: require.Error,
},
{
name: "mismatched spdx expression",
subject: License{
Value: "MIT",
SPDXExpression: "DIFFERENT!!",
Type: license.Declared,
URLs: []string{
"b", "a",
},
Locations: file.NewLocationSet(locA),
},
other: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"c", "d",
},
Locations: file.NewLocationSet(locB),
},
wantErr: require.Error,
},
{
name: "mismatched type",
subject: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Concluded,
URLs: []string{
"b", "a",
},
Locations: file.NewLocationSet(locA),
},
other: License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: []string{
"c", "d",
},
Locations: file.NewLocationSet(locB),
},
wantErr: require.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
subjectLocationLen := len(tt.subject.Locations.ToSlice())
subjectURLLen := len(tt.subject.URLs)
got, err := tt.subject.Merge(tt.other)
tt.wantErr(t, err)
if err != nil {
return
}
require.NotNilf(t, got, "expected a non-nil license")
assert.Equal(t, tt.want, *got)
// prove we don't modify the subject
assert.Equal(t, subjectLocationLen, len(tt.subject.Locations.ToSlice()))
assert.Equal(t, subjectURLLen, len(tt.subject.URLs))
})
}
}

View file

@ -4,13 +4,11 @@ import (
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger"
@ -59,8 +57,8 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
func TestPkgCoverageImage(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, nil)
observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet()
observedLanguages := strset.New()
definedLanguages := strset.New()
for _, l := range pkg.AllLanguages {
definedLanguages.Add(l.String())
}
@ -76,8 +74,8 @@ func TestPkgCoverageImage(t *testing.T) {
definedLanguages.Remove(pkg.Erlang.String())
definedLanguages.Remove(pkg.Elixir.String())
observedPkgs := internal.NewStringSet()
definedPkgs := internal.NewStringSet()
observedPkgs := strset.New()
definedPkgs := strset.New()
for _, p := range pkg.AllPkgs {
definedPkgs.Add(string(p))
}
@ -148,29 +146,37 @@ func TestPkgCoverageImage(t *testing.T) {
observedPkgs.Remove(string(pkg.UnknownPkg))
definedPkgs.Remove(string(pkg.UnknownPkg))
missingLang := strset.Difference(definedLanguages, observedLanguages)
extraLang := strset.Difference(observedLanguages, definedLanguages)
// ensure that integration test cases stay in sync with the available catalogers
if diff := cmp.Diff(definedLanguages, observedLanguages); diff != "" {
t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", len(definedLanguages), len(observedLanguages))
t.Errorf("definedLanguages mismatch observedLanguages (-want +got):\n%s", diff)
if missingLang.Size() > 0 || extraLang.Size() > 0 {
t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", definedLanguages.Size(), observedLanguages.Size())
t.Errorf("unexpected languages: %s", extraLang.List())
t.Errorf("missing languages: %s", missingLang.List())
}
if diff := cmp.Diff(definedPkgs, observedPkgs); diff != "" {
t.Errorf("package coverage incomplete (packages=%d, coverage=%d)", len(definedPkgs), len(observedPkgs))
t.Errorf("definedPkgs mismatch observedPkgs (-want +got):\n%s", diff)
missingPkgs := strset.Difference(definedPkgs, observedPkgs)
extraPkgs := strset.Difference(observedPkgs, definedPkgs)
if missingPkgs.Size() > 0 || extraPkgs.Size() > 0 {
t.Errorf("package coverage incomplete (packages=%d, coverage=%d)", definedPkgs.Size(), observedPkgs.Size())
t.Errorf("unexpected packages: %s", extraPkgs.List())
t.Errorf("missing packages: %s", missingPkgs.List())
}
}
func TestPkgCoverageDirectory(t *testing.T) {
sbom, _ := catalogDirectory(t, "test-fixtures/image-pkg-coverage")
observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet()
observedLanguages := strset.New()
definedLanguages := strset.New()
for _, l := range pkg.AllLanguages {
definedLanguages.Add(l.String())
}
observedPkgs := internal.NewStringSet()
definedPkgs := internal.NewStringSet()
observedPkgs := strset.New()
definedPkgs := strset.New()
for _, p := range pkg.AllPkgs {
definedPkgs.Add(string(p))
}
@ -237,12 +243,12 @@ func TestPkgCoverageDirectory(t *testing.T) {
definedPkgs.Remove(string(pkg.KbPkg))
// ensure that integration test commonTestCases stay in sync with the available catalogers
if len(observedLanguages) < len(definedLanguages) {
t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", len(definedLanguages), len(observedLanguages))
if observedLanguages.Size() < definedLanguages.Size() {
t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", definedLanguages.Size(), observedLanguages.Size())
}
if len(observedPkgs) < len(definedPkgs) {
t.Errorf("package coverage incomplete (packages=%d, coverage=%d)", len(definedPkgs), len(observedPkgs))
if observedPkgs.Size() < definedPkgs.Size() {
t.Errorf("package coverage incomplete (packages=%d, coverage=%d)", definedPkgs.Size(), observedPkgs.Size())
}
}
@ -251,8 +257,8 @@ func TestPkgCoverageCatalogerConfiguration(t *testing.T) {
// for which that cataloger isn't enabled by defauly
sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope, []string{"rust"})
observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet()
observedLanguages := strset.New()
definedLanguages := strset.New()
definedLanguages.Add("rust")
for actualPkg := range sbom.Artifacts.Packages.Enumerate() {

View file

@ -5,14 +5,15 @@ import (
"strings"
"testing"
"github.com/anchore/syft/internal"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/pkg"
)
func TestNpmPackageLockDirectory(t *testing.T) {
sbom, _ := catalogDirectory(t, "test-fixtures/npm-lock")
foundPackages := internal.NewStringSet()
foundPackages := strset.New()
for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations.ToSlice() {
@ -25,16 +26,16 @@ func TestNpmPackageLockDirectory(t *testing.T) {
// ensure that integration test commonTestCases stay in sync with the available catalogers
const expectedPackageCount = 6
if len(foundPackages) != expectedPackageCount {
t.Errorf("found the wrong set of npm package-lock.json packages (expected: %d, actual: %d)", expectedPackageCount, len(foundPackages))
if foundPackages.Size() != expectedPackageCount {
t.Errorf("found the wrong set of npm package-lock.json packages (expected: %d, actual: %d)", expectedPackageCount, foundPackages.Size())
}
}
func TestYarnPackageLockDirectory(t *testing.T) {
sbom, _ := catalogDirectory(t, "test-fixtures/yarn-lock")
foundPackages := internal.NewStringSet()
expectedPackages := internal.NewStringSet("async@0.9.2", "async@3.2.3", "merge-objects@1.0.5", "should-type@1.3.0", "@4lolo/resize-observer-polyfill@1.5.2")
foundPackages := strset.New()
expectedPackages := strset.New("async@0.9.2", "async@3.2.3", "merge-objects@1.0.5", "should-type@1.3.0", "@4lolo/resize-observer-polyfill@1.5.2")
for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations.ToSlice() {
@ -46,9 +47,9 @@ func TestYarnPackageLockDirectory(t *testing.T) {
}
// ensure that integration test commonTestCases stay in sync with the available catalogers
if len(foundPackages) != len(expectedPackages) {
t.Errorf("found the wrong set of yarn.lock packages (expected: %d, actual: %d)", len(expectedPackages), len(foundPackages))
if foundPackages.Size() != expectedPackages.Size() {
t.Errorf("found the wrong set of yarn.lock packages (expected: %d, actual: %d)", expectedPackages.Size(), foundPackages.Size())
} else if !reflect.DeepEqual(foundPackages, expectedPackages) {
t.Errorf("found the wrong set of yarn.lock packages (expected: %+q, actual: %+q)", expectedPackages.ToSlice(), foundPackages.ToSlice())
t.Errorf("found the wrong set of yarn.lock packages (expected: %+q, actual: %+q)", expectedPackages.List(), foundPackages.List())
}
}