fix: separate golang license caches from mod dir (#2852)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2024-06-12 19:12:35 -04:00 committed by GitHub
parent dd723bb3c5
commit ca0cc52d47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 1496 additions and 149 deletions

View file

@ -43,6 +43,7 @@ type attestOptions struct {
options.UpdateCheck `yaml:",inline" mapstructure:",squash"`
options.Catalog `yaml:",inline" mapstructure:",squash"`
Attest options.Attest `yaml:"attest" mapstructure:"attest"`
Cache options.Cache `json:"-" yaml:"cache" mapstructure:"cache"`
}
func Attest(app clio.Application) *cobra.Command {
@ -77,6 +78,7 @@ func defaultAttestOptions() attestOptions {
Output: defaultAttestOutputOptions(),
UpdateCheck: options.DefaultUpdateCheck(),
Catalog: options.DefaultCatalog(),
Cache: options.DefaultCache(),
}
}

View file

@ -68,6 +68,7 @@ type scanOptions struct {
options.Output `yaml:",inline" mapstructure:",squash"`
options.UpdateCheck `yaml:",inline" mapstructure:",squash"`
options.Catalog `yaml:",inline" mapstructure:",squash"`
Cache options.Cache `json:"-" yaml:"cache" mapstructure:"cache"`
}
func defaultScanOptions() *scanOptions {
@ -75,6 +76,7 @@ func defaultScanOptions() *scanOptions {
Output: options.DefaultOutput(),
UpdateCheck: options.DefaultUpdateCheck(),
Catalog: options.DefaultCatalog(),
Cache: options.DefaultCache(),
}
}

View file

@ -0,0 +1,122 @@
package options
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"github.com/adrg/xdg"
"github.com/mitchellh/go-homedir"
"github.com/anchore/clio"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/log"
)
// Cache provides configuration for the Syft caching behavior
type Cache struct {
Dir string `yaml:"dir" mapstructure:"dir"`
TTL string `yaml:"ttl" mapstructure:"ttl"`
}
func (c *Cache) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&c.Dir, "root directory to cache any downloaded content")
descriptions.Add(&c.TTL, "time to live for cached data")
}
func (c *Cache) PostLoad() error {
if c.Dir != "" {
ttl, err := parseDuration(c.TTL)
if err != nil {
log.Warnf("unable to parse duration '%v', using default (%s) due to: %v", c.TTL, durationToString(defaultTTL()), err)
ttl = defaultTTL()
}
dir, err := homedir.Expand(c.Dir)
if err != nil {
log.Warnf("unable to expand cache directory %s: %v", c.Dir, err)
cache.SetManager(cache.NewInMemory(ttl))
} else {
m, err := cache.NewFromDir(dir, ttl)
if err != nil {
log.Warnf("unable to get filesystem cache at %s: %v", c.Dir, err)
cache.SetManager(cache.NewInMemory(ttl))
} else {
cache.SetManager(m)
}
}
}
return nil
}
var _ interface {
clio.PostLoader
clio.FieldDescriber
} = (*Cache)(nil)
func DefaultCache() Cache {
return Cache{
Dir: defaultDir(),
TTL: durationToString(defaultTTL()),
}
}
func defaultTTL() time.Duration {
return 7 * 24 * time.Hour
}
func defaultDir() string {
var err error
cacheRoot := xdg.CacheHome
if cacheRoot == "" {
cacheRoot, err = homedir.Dir()
if err != nil {
cacheRoot = os.TempDir()
log.Debugf("unable to get stable cache directory due to: %v, defaulting cache to temp dir: %s", err, cacheRoot)
} else {
cacheRoot = filepath.Join(cacheRoot, ".cache")
}
}
return filepath.Join(cacheRoot, "syft")
}
func durationToString(duration time.Duration) string {
days := int64(duration / (24 * time.Hour))
remain := duration % (24 * time.Hour)
out := ""
if days > 0 {
out = fmt.Sprintf("%vd", days)
}
if remain != 0 {
out += remain.String()
}
if out == "" {
return "0"
}
return out
}
var whitespace = regexp.MustCompile(`\s+`)
func parseDuration(duration string) (time.Duration, error) {
duration = strings.ToLower(whitespace.ReplaceAllString(duration, ""))
parts := strings.SplitN(duration, "d", 2)
var days time.Duration
var remain time.Duration
var err error
if len(parts) > 1 {
numDays, daysErr := strconv.Atoi(parts[0])
if daysErr != nil {
return 0, daysErr
}
days = time.Duration(numDays) * 24 * time.Hour
remain, err = time.ParseDuration(parts[1])
} else {
remain, err = time.ParseDuration(duration)
}
return days + remain, err
}

View file

@ -0,0 +1,184 @@
package options
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/adrg/xdg"
"github.com/mitchellh/go-homedir"
"github.com/stretchr/testify/require"
)
func Test_defaultDir(t *testing.T) {
tmpDir := filepath.Join(t.TempDir(), "cache-temp")
xdgCacheDir := filepath.Join(tmpDir, "fake-xdg-cache")
homeDir := filepath.Join(tmpDir, "fake-home")
tests := []struct {
name string
env map[string]string
expected string
}{
{
name: "no-xdg",
env: map[string]string{
"HOME": homeDir,
},
expected: homeDir,
},
{
name: "xdg-cache",
env: map[string]string{
"XDG_CACHE_HOME": xdgCacheDir,
},
expected: xdgCacheDir,
},
}
// capture all the initial environment variables to reset them before we reset library caches
env := map[string]string{
"HOME": "",
"XDG_DATA_HOME": "",
"XDG_DATA_DIRS": "",
"XDG_CONFIG_HOME": "",
"XDG_CONFIG_DIRS": "",
"XDG_STATE_HOME": "",
"XDG_CACHE_HOME": "",
"XDG_RUNTIME_DIR": "",
}
for k := range env {
env[k] = os.Getenv(k)
}
unsetEnv := func(t *testing.T) {
for k := range env {
t.Setenv(k, "")
}
}
resetEnv := func() {
for k, v := range env {
if v == "" {
_ = os.Unsetenv(k)
} else {
_ = os.Setenv(k, v)
}
}
homedir.Reset()
xdg.Reload()
}
t.Cleanup(resetEnv)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
defer resetEnv()
unsetEnv(t)
for k, v := range test.env {
t.Setenv(k, v)
}
homedir.Reset()
xdg.Reload()
got := defaultDir()
require.True(t, strings.HasPrefix(got, test.expected))
})
}
}
func Test_parseDuration(t *testing.T) {
tests := []struct {
duration string
expect time.Duration
err require.ErrorAssertionFunc
}{
{
duration: "1d",
expect: 24 * time.Hour,
},
{
duration: "7d",
expect: 7 * 24 * time.Hour,
},
{
duration: "365D",
expect: 365 * 24 * time.Hour,
},
{
duration: "7d1h1m1s",
expect: 7*24*time.Hour + time.Hour + time.Minute + time.Second,
},
{
duration: "7d 1h 1m 1s",
expect: 7*24*time.Hour + time.Hour + time.Minute + time.Second,
},
{
duration: "2h",
expect: 2 * time.Hour,
},
{
duration: "2h5m",
expect: 2*time.Hour + 5*time.Minute,
},
{
duration: "2h 5m",
expect: 2*time.Hour + 5*time.Minute,
},
{
duration: "d24h",
err: require.Error,
},
}
for _, test := range tests {
t.Run(test.duration, func(t *testing.T) {
got, err := parseDuration(test.duration)
if test.err != nil {
test.err(t, err)
return
}
require.Equal(t, test.expect, got)
})
}
}
func Test_durationToString(t *testing.T) {
tests := []struct {
duration time.Duration
expect string
err require.ErrorAssertionFunc
}{
{
expect: "1d",
duration: 24 * time.Hour,
},
{
expect: "7d",
duration: 7 * 24 * time.Hour,
},
{
expect: "7d1h1m1s",
duration: 7*24*time.Hour + time.Hour + time.Minute + time.Second,
},
{
expect: "2h0m0s",
duration: 2 * time.Hour,
},
{
expect: "2h5m0s",
duration: 2*time.Hour + 5*time.Minute,
},
}
for _, test := range tests {
t.Run(test.expect, func(t *testing.T) {
got := durationToString(test.duration)
require.Equal(t, test.expect, got)
})
}
}

6
go.mod
View file

@ -86,7 +86,10 @@ require (
require google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect
require github.com/magiconair/properties v1.8.7
require (
github.com/adrg/xdg v0.4.0
github.com/magiconair/properties v1.8.7
)
require (
dario.cat/mergo v1.0.0 // indirect
@ -98,7 +101,6 @@ require (
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/ProtonMail/go-crypto v1.0.0 // indirect
github.com/adrg/xdg v0.4.0 // indirect
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect

51
internal/cache/README.md vendored Normal file
View file

@ -0,0 +1,51 @@
# Caching
All caches are created from a global `manager`. By defaut this is a `bypassedCache`, which performs no caching.
One benefit of this is that tests don't need to worry about caching causing issues unless they explicitly need
to test the cache and can opt-in using the `cache.TestCache(t)` helper.
Syft sets a `filesystemCache` when the [cache options](../../cmd/syft/internal/options/cache.go) are loaded.
When using the `filesystemCache` all items are stored on disk under a root directory, generally in the form of:
```
<rootDir>/<named-cache>/<data-version>/path/to/data
```
# Using the cache
The easiest and preferred method to use the cache is a `cache.Resolver`, which automatically creates a `<data-version>`
based on the _structure_ of the provided type.
If the structure changes in any way it will end up with a new version key and all will re populate this new key,
ignoring cached values from older, different versions.
The resolver will store items using the `json` package to serialize/deserialize values, so to save space
it is encouraged to use `omitempty`. For example:
```go
type myCacheItem struct {
Name string `json:"name",omitempty`
}
```
It is possible to use core types such as `pkg.Package` as long as they support the standard `json` serialization,
but this is discouraged in order to decouple changes to them from affecting the information stored in the cache.
To get a cache for this type:
```go
resolver := cache.GetResolver[myCacheItem]("myCacheName", "v1")
```
Using the `resolver` is a single call, which manages checking for items in the cache, expiry times,
and if not found invoking the callback to populate the cache and return a value:
```go
data := resolver.Resolve("some/cache/key", func() (myCacheItem, error) {
// do things to return a myCacheItem or error
})
```
If it is common that checking for an item will result in errors, and you do not want to re-run the resolve function
when errors are encountered, instead of using `GetResolver`, you can use `GetResolverCachingErrors`, which is useful
for things such as resolving artifacts over a network, where a number of them will not be resolved, and you do not want
to continue to have the expense of running the network resolution. This should be used when it is acceptable a network
outage and cached errors is an acceptable risk.
An example can be seen in the [golang cataloger](../../syft/pkg/cataloger/golang/licenses.go) fetching remote licenses.

24
internal/cache/bypass.go vendored Normal file
View file

@ -0,0 +1,24 @@
package cache
import "io"
type bypassedCache struct{}
func (b *bypassedCache) Read(_ string) (ReaderAtCloser, error) {
return nil, errNotFound
}
func (b *bypassedCache) Write(_ string, contents io.Reader) error {
if closer, ok := contents.(io.Closer); ok {
_ = closer.Close()
}
return nil
}
func (b *bypassedCache) GetCache(_, _ string) Cache {
return b
}
func (b *bypassedCache) RootDirs() []string {
return nil
}

18
internal/cache/bypass_test.go vendored Normal file
View file

@ -0,0 +1,18 @@
package cache
import (
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func Test_bypassedCache(t *testing.T) {
m := bypassedCache{}
cache := m.GetCache("name", "version")
err := cache.Write("test", strings.NewReader("value"))
require.NoError(t, err)
rdr, err := cache.Read("test")
require.Nil(t, rdr)
require.ErrorIs(t, err, errNotFound)
}

48
internal/cache/cache.go vendored Normal file
View file

@ -0,0 +1,48 @@
package cache
import (
"io"
)
// Manager is responsible for managing cache data and instantiating all caches
type Manager interface {
// GetCache returns a cache scoped to the given named, versioned data
GetCache(name, version string) Cache
// RootDirs returns any root directories this cache manager uses
RootDirs() []string
}
// ReaderAtCloser is an amalgamation of: io.Reader, io.ReaderAt, and io.Closer
type ReaderAtCloser interface {
io.Reader
io.ReaderAt
io.Closer
}
// Cache is what the application interacts with to get and set cached data
type Cache interface {
// Read returns a reader for the cache value, if found and not expired
// or errors when unable to find / expired
Read(key string) (ReaderAtCloser, error)
// Write writes the contents of the reader to the cache
// and closes it, if the reader implements io.Closer
Write(key string, contents io.Reader) error
}
// GetManager returns the global cache manager, which is used to instantiate all caches
func GetManager() Manager {
return manager
}
// SetManager sets the global cache manager, which is used to instantiate all caches
func SetManager(m Manager) {
if m == nil {
manager = &bypassedCache{}
} else {
manager = m
}
}
var manager Manager = &bypassedCache{}

21
internal/cache/cache_test.go vendored Normal file
View file

@ -0,0 +1,21 @@
package cache
import (
"testing"
"github.com/stretchr/testify/require"
)
func Test_SetManager(t *testing.T) {
original := GetManager()
defer SetManager(original)
SetManager(NewInMemory(0))
require.NotNil(t, GetManager())
require.IsType(t, &filesystemCache{}, GetManager())
SetManager(nil)
require.NotNil(t, GetManager())
require.IsType(t, &bypassedCache{}, GetManager())
}

40
internal/cache/error_resolver.go vendored Normal file
View file

@ -0,0 +1,40 @@
package cache
import "fmt"
// GetResolverCachingErrors returns a Resolver that caches errors and will return them
// instead of continuing to call the provided resolve functions
func GetResolverCachingErrors[T any](name, version string) Resolver[T] {
return &errorResolver[T]{
resolver: GetResolver[errResponse[T]](name, version),
}
}
type errResponse[T any] struct {
Error string `json:"err,omitempty"`
Value T `json:"val,omitempty"`
}
type errorResolver[T any] struct {
resolver Resolver[errResponse[T]]
}
func (r *errorResolver[T]) Resolve(key string, resolver resolverFunc[T]) (T, error) {
v, err := r.resolver.Resolve(key, func() (errResponse[T], error) {
v, err := resolver()
out := errResponse[T]{
Value: v,
}
if err != nil {
out.Error = err.Error()
}
return out, nil
})
if err != nil {
return v.Value, err
}
if v.Error != "" {
return v.Value, fmt.Errorf(v.Error)
}
return v.Value, nil
}

47
internal/cache/error_resolver_test.go vendored Normal file
View file

@ -0,0 +1,47 @@
package cache
import (
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func Test_errorResolver(t *testing.T) {
original := GetManager()
defer SetManager(original)
SetManager(NewInMemory(time.Hour))
resolver := GetResolverCachingErrors[string]("theCache", "theVersion")
resolveCount := 0
resolveFunc := func() (string, error) {
resolveCount++
return "theValue", nil
}
val, err := resolver.Resolve("theKey", resolveFunc)
require.NoError(t, err)
require.Equal(t, 1, resolveCount)
require.Equal(t, "theValue", val)
val, err = resolver.Resolve("theKey", resolveFunc)
require.NoError(t, err)
require.Equal(t, 1, resolveCount)
require.Equal(t, "theValue", val)
errorCount := 0
errorFunc := func() (string, error) {
errorCount++
return "", fmt.Errorf("an error")
}
_, err = resolver.Resolve("errorValue", errorFunc)
require.ErrorContains(t, err, "an error")
require.Equal(t, 1, errorCount)
_, err = resolver.Resolve("errorValue", errorFunc)
require.ErrorContains(t, err, "an error")
require.Equal(t, 1, errorCount)
}

120
internal/cache/filesystem.go vendored Normal file
View file

@ -0,0 +1,120 @@
package cache
import (
"errors"
"fmt"
"io"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/spf13/afero"
"github.com/anchore/syft/internal/log"
)
// NewFromDir creates a new cache manager which returns caches stored on disk, rooted at the given directory
func NewFromDir(dir string, ttl time.Duration) (Manager, error) {
dir = filepath.Clean(dir)
fsys, err := subFs(afero.NewOsFs(), dir)
if err != nil {
return nil, err
}
return &filesystemCache{
dir: dir,
fs: fsys,
ttl: ttl,
}, nil
}
const filePermissions = 0700
const dirPermissions = os.ModeDir | filePermissions
type filesystemCache struct {
dir string
fs afero.Fs
ttl time.Duration
}
func (d *filesystemCache) GetCache(name, version string) Cache {
fsys, err := subFs(d.fs, name, version)
if err != nil {
log.Warnf("error getting cache for: %s/%s: %v", name, version, err)
return &bypassedCache{}
}
return &filesystemCache{
dir: filepath.Join(d.dir, name, version),
fs: fsys,
ttl: d.ttl,
}
}
func (d *filesystemCache) RootDirs() []string {
if d.dir == "" {
return nil
}
return []string{d.dir}
}
func (d *filesystemCache) Read(key string) (ReaderAtCloser, error) {
path := makeDiskKey(key)
f, err := d.fs.Open(path)
if err != nil {
log.Tracef("no cache entry for %s %s: %v", d.dir, key, err)
return nil, errNotFound
} else if stat, err := f.Stat(); err != nil || stat == nil || time.Since(stat.ModTime()) > d.ttl {
log.Tracef("cache entry is too old for %s %s", d.dir, key)
return nil, errExpired
}
log.Tracef("using cache for %s %s", d.dir, key)
return f, nil
}
func (d *filesystemCache) Write(key string, contents io.Reader) error {
path := makeDiskKey(key)
return afero.WriteReader(d.fs, path, contents)
}
// subFs returns a writable directory with the given name under the root cache directory returned from findRoot,
// the directory will be created if it does not exist
func subFs(fsys afero.Fs, subDirs ...string) (afero.Fs, error) {
dir := filepath.Join(subDirs...)
dir = filepath.Clean(dir)
stat, err := fsys.Stat(dir)
if errors.Is(err, afero.ErrFileNotFound) {
err = fsys.MkdirAll(dir, dirPermissions)
if err != nil {
return nil, fmt.Errorf("unable to create directory at '%s/%s': %v", dir, strings.Join(subDirs, "/"), err)
}
stat, err = fsys.Stat(dir)
if err != nil {
return nil, err
}
}
if err != nil || stat == nil || !stat.IsDir() {
return nil, fmt.Errorf("unable to verify directory '%s': %v", dir, err)
}
fsys = afero.NewBasePathFs(fsys, dir)
return fsys, err
}
var keyReplacer = regexp.MustCompile("[^-._/a-zA-Z0-9]")
// makeDiskKey makes a safe sub-path but not escape forward slashes, this allows for logical partitioning on disk
func makeDiskKey(key string) string {
// encode single dot directory
if key == "." {
return "%2E"
}
// replace any disallowed chars with encoded form
key = keyReplacer.ReplaceAllStringFunc(key, url.QueryEscape)
// allow . in names but not ..
key = strings.ReplaceAll(key, "..", "%2E%2E")
return key
}
var errNotFound = fmt.Errorf("not found")
var errExpired = fmt.Errorf("expired")

94
internal/cache/filesystem_test.go vendored Normal file
View file

@ -0,0 +1,94 @@
package cache
import (
"io"
"net/url"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal"
)
func Test_filesystemCache(t *testing.T) {
dir := t.TempDir()
man, err := NewFromDir(dir, 1*time.Minute)
require.NoError(t, err)
cacheName := "test"
cacheVersion := "v1"
cache := man.GetCache(cacheName, cacheVersion)
cacheKey := "test-key"
contentsValue := "some contents to cache"
err = cache.Write(cacheKey, strings.NewReader(contentsValue))
require.NoError(t, err)
rdr, err := cache.Read(cacheKey)
require.NoError(t, err)
defer internal.CloseAndLogError(rdr, cacheKey)
contents, err := io.ReadAll(rdr)
require.NoError(t, err)
require.Equal(t, contentsValue, string(contents))
// check the contents were actually written to disk as expected
contents, err = os.ReadFile(filepath.Join(dir, cacheName, cacheVersion, cacheKey))
require.NoError(t, err)
require.Equal(t, contentsValue, string(contents))
_, err = cache.Read("otherKey")
require.ErrorIs(t, err, errNotFound)
}
func Test_makeDiskKey(t *testing.T) {
tests := []struct {
in string
expected string
}{
{
in: "",
expected: "",
},
{
in: ".",
expected: "%2E",
},
{
in: "..",
expected: "%2E%2E",
},
{
in: "github.com",
expected: "github.com",
},
{
in: "../github.com",
expected: "%2E%2E/github.com",
},
{
in: "github.com/../..",
expected: "github.com/%2E%2E/%2E%2E",
},
{
in: "github.com/%2E../..",
expected: "github.com/%252E%2E%2E/%2E%2E",
},
}
for _, test := range tests {
t.Run(test.in, func(t *testing.T) {
got := makeDiskKey(test.in)
// validate appropriate escaping
require.Equal(t, test.expected, got)
// also validate that unescaped string matches original
unescaped, err := url.QueryUnescape(got)
require.NoError(t, err)
require.Equal(t, test.in, unescaped)
})
}
}

71
internal/cache/hash_type.go vendored Normal file
View file

@ -0,0 +1,71 @@
package cache
import (
"fmt"
"reflect"
"github.com/mitchellh/hashstructure/v2"
)
// hashType returns a stable hash based on the structure of the type
func hashType[T any]() string {
// get the base type and hash an empty instance
var t T
empty := emptyValue(reflect.TypeOf(t)).Interface()
hash, err := hashstructure.Hash(empty, hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: false,
IgnoreZeroValue: false,
SlicesAsSets: false,
UseStringer: false,
})
if err != nil {
panic(fmt.Errorf("unable to use type as cache key: %w", err))
}
return fmt.Sprintf("%x", hash)
}
func emptyValue(t reflect.Type) reflect.Value {
switch t.Kind() {
case reflect.Pointer:
e := t.Elem()
v := emptyValue(e)
if v.CanAddr() {
return v.Addr()
}
ptrv := reflect.New(e)
ptrv.Elem().Set(v)
return ptrv
case reflect.Slice:
v := emptyValue(t.Elem())
s := reflect.MakeSlice(t, 1, 1)
s.Index(0).Set(v)
return s
case reflect.Struct:
v := reflect.New(t).Elem()
// get all empty field values, too
for i := 0; i < v.NumField(); i++ {
f := t.Field(i)
if isIgnored(f) {
continue
}
fv := v.Field(i)
if fv.CanSet() {
fv.Set(emptyValue(f.Type))
}
}
return v
default:
return reflect.New(t).Elem()
}
}
func isIgnored(f reflect.StructField) bool {
if !f.IsExported() {
return true
}
tag := f.Tag.Get("hash")
if tag == "-" || tag == "ignore" {
return true
}
return false
}

123
internal/cache/hash_type_test.go vendored Normal file
View file

@ -0,0 +1,123 @@
package cache
import (
"fmt"
"testing"
"github.com/mitchellh/hashstructure/v2"
"github.com/stretchr/testify/require"
)
func Test_hashType(t *testing.T) {
type t1 struct {
Name string
}
type t2 struct {
Name string
}
type generic[T any] struct {
Val T
}
tests := []struct {
name string
hash func() string
expected string
}{
{
name: "struct 1",
hash: func() string { return hashType[t1]() },
expected: "d106c3ffbf98a0b1",
},
{
name: "slice of struct 1",
hash: func() string { return hashType[[]t1]() },
expected: "8122ace4ee1af0b4",
},
{
name: "slice of struct 2",
hash: func() string { return hashType[[]t2]() },
expected: "8cc04b5808be5bf9",
},
{
name: "ptr 1",
hash: func() string { return hashType[*t1]() },
expected: "d106c3ffbf98a0b1", // same hash as t1, which is ok since the structs are the same
},
{
name: "slice of ptr 1",
hash: func() string { return hashType[[]*t1]() },
expected: "8122ace4ee1af0b4", // same hash as []t1, again underlying serialization is the same
},
{
name: "slice of ptr 2",
hash: func() string { return hashType[[]*t2]() },
expected: "8cc04b5808be5bf9", // same hash as []t2, underlying serialization is the same
},
{
name: "slice of ptr of slice of ptr",
hash: func() string { return hashType[[]*[]*t1]() },
expected: "500d9f5b3a5977ce",
},
{
name: "generic 1",
hash: func() string { return hashType[generic[t1]]() },
expected: "b5fbb30e24400e81",
},
{
name: "generic 2",
hash: func() string { return hashType[generic[t2]]() },
expected: "becdb767c6b22bfa",
},
{
name: "generic with ptr 1",
hash: func() string { return hashType[generic[*t1]]() },
expected: "30c8855bf290fd83",
},
{
name: "generic with ptr 2",
hash: func() string { return hashType[generic[*t2]]() },
expected: "b66366b6ce9e6361",
},
{
name: "generic with slice 1",
hash: func() string { return hashType[generic[[]t1]]() },
expected: "d2ed158942fa6c29",
},
{
name: "generic with slice 2",
hash: func() string { return hashType[generic[[]t2]]() },
expected: "7a7bec575871c179",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
require.Equal(t, test.expected, test.hash())
})
}
}
func Test_hashIgnores(t *testing.T) {
hash := func(v any) string {
v, err := hashstructure.Hash(v, hashstructure.FormatV2, &hashstructure.HashOptions{})
require.NoError(t, err)
return fmt.Sprintf("%x", v)
}
type t1 struct {
Name string
notExported string
}
require.Equal(t, hash(t1{notExported: "a value"}), hashType[t1]())
type t2 struct {
Name string
Exported string `hash:"ignore"`
}
require.Equal(t, hash(t2{Exported: "another value"}), hashType[t2]())
type t3 struct {
Name string
Exported string `hash:"-"`
}
require.Equal(t, hash(t3{Exported: "still valued"}), hashType[t3]())
}

16
internal/cache/memory.go vendored Normal file
View file

@ -0,0 +1,16 @@
package cache
import (
"time"
"github.com/spf13/afero"
)
// NewInMemory returns an in-memory only cache manager
func NewInMemory(ttl time.Duration) Manager {
return &filesystemCache{
dir: "",
fs: afero.NewMemMapFs(),
ttl: ttl,
}
}

37
internal/cache/memory_test.go vendored Normal file
View file

@ -0,0 +1,37 @@
package cache
import (
"io"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal"
)
func Test_NewInMemory(t *testing.T) {
man := NewInMemory(time.Hour)
cacheName := "test"
cacheVersion := "v1"
cache := man.GetCache(cacheName, cacheVersion)
cacheKey := "test-key"
contentsValue := "some contents to cache"
err := cache.Write(cacheKey, strings.NewReader(contentsValue))
require.NoError(t, err)
rdr, err := cache.Read(cacheKey)
require.NoError(t, err)
defer internal.CloseAndLogError(rdr, cacheKey)
contents, err := io.ReadAll(rdr)
require.NoError(t, err)
require.Equal(t, contentsValue, string(contents))
_, err = cache.Read("otherKey")
require.ErrorIs(t, err, errNotFound)
}

83
internal/cache/resolver.go vendored Normal file
View file

@ -0,0 +1,83 @@
package cache
import (
"bytes"
"encoding/json"
"fmt"
"path"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
)
// Resolver interface provides a single Resolve method, which will return from cache
// or call the provided resolve function to get the value if not available in cache
type Resolver[T any] interface {
// Resolve attempts to resolve the given key from cache and convert it to the type of the cache,
// or calls the resolver function if unable to resolve a cached value
Resolve(key string, resolver resolverFunc[T]) (T, error)
}
// GetResolver returns a cache resolver for persistent cached data across Syft runs, stored in a unique
// location based on the provided name and versioned by the type
func GetResolver[T any](name, version string) Resolver[T] {
typeHash := hashType[T]()
versionKey := path.Join(version, typeHash)
return &cacheResolver[T]{
name: fmt.Sprintf("%s/%s", name, versionKey),
cache: manager.GetCache(name, versionKey),
}
}
const resolverKeySuffix = ".json"
type resolverFunc[T any] func() (T, error)
type cacheResolver[T any] struct {
name string
cache Cache
}
var _ interface {
Resolver[int]
} = (*cacheResolver[int])(nil)
func (r *cacheResolver[T]) Resolve(key string, resolver resolverFunc[T]) (T, error) {
key += resolverKeySuffix
rdr, err := r.cache.Read(key)
if rdr == nil || err != nil {
return r.resolveAndCache(key, resolver)
}
defer internal.CloseAndLogError(rdr, key)
dec := json.NewDecoder(rdr)
if dec == nil {
log.Tracef("error getting cache json decoder for %s %v: %v", r.name, key, err)
return r.resolveAndCache(key, resolver)
}
var t T
err = dec.Decode(&t)
if err != nil {
log.Tracef("error decoding cached entry for %s %v: %v", r.name, key, err)
return r.resolveAndCache(key, resolver)
}
// no error, able to resolve from cache
return t, nil
}
func (r *cacheResolver[T]) resolveAndCache(key string, resolver func() (T, error)) (T, error) {
t, err := resolver()
if err != nil {
return t, err
}
var data bytes.Buffer
enc := json.NewEncoder(&data)
enc.SetEscapeHTML(false)
err = enc.Encode(t)
if err != nil {
return t, err
}
err = r.cache.Write(key, &data)
return t, err
}

92
internal/cache/resolver_test.go vendored Normal file
View file

@ -0,0 +1,92 @@
package cache
import (
"encoding/json"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func Test_Resolver(t *testing.T) {
original := GetManager()
defer SetManager(original)
SetManager(NewInMemory(time.Hour))
type sub struct {
Name string
Value bool
}
type thing struct {
Value string
Values []int
Subs []*sub
}
versionHash := hashType[thing]()
cache := GetManager().GetCache("test", "v7/"+versionHash)
resolver := GetResolver[thing]("test", "v7")
require.NotNil(t, resolver)
require.IsType(t, &cacheResolver[thing]{}, resolver)
cr := resolver.(*cacheResolver[thing])
require.IsType(t, cache, cr.cache)
resolveErrCount := 0
resolveThingErr := func() (thing, error) {
resolveErrCount++
return thing{}, fmt.Errorf("an error")
}
_, err := resolver.Resolve("err", resolveThingErr)
require.ErrorContains(t, err, "an error")
require.Equal(t, 1, resolveErrCount)
_, err = resolver.Resolve("err", resolveThingErr)
require.ErrorContains(t, err, "an error")
require.Equal(t, 2, resolveErrCount)
aThing := thing{
Value: "a value",
Values: []int{7, 8, 9},
Subs: []*sub{
{
Name: "sub1",
Value: true,
},
{
Name: "sub2",
Value: false,
},
},
}
resolveThingCount := 0
resolveThing := func() (thing, error) {
resolveThingCount++
return aThing, nil
}
val, err := resolver.Resolve("thing", resolveThing)
require.NoError(t, err)
require.Equal(t, 1, resolveThingCount)
require.Equal(t, aThing, val)
val, err = resolver.Resolve("thing", resolveThing)
require.NoError(t, err)
require.Equal(t, 1, resolveThingCount)
require.Equal(t, aThing, val)
rdr, err := cache.Read("thing" + resolverKeySuffix)
require.NoError(t, err)
decoder := json.NewDecoder(rdr)
var val2 thing
err = decoder.Decode(&val2)
require.NoError(t, err)
require.Equal(t, aThing, val2)
}

View file

@ -20,12 +20,8 @@ const (
// NewGoModuleFileCataloger returns a new cataloger object that searches within go.mod files.
func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger {
c := goModCataloger{
licenses: newGoLicenses(modFileCatalogerName, opts),
}
return generic.NewCataloger(modFileCatalogerName).
WithParserByGlobs(c.parseGoModFile, "**/go.mod")
WithParserByGlobs(newGoModCataloger(opts).parseGoModFile, "**/go.mod")
}
// NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler.

View file

@ -41,6 +41,7 @@ type MainModuleVersionConfig struct {
func DefaultCatalogerConfig() CatalogerConfig {
g := CatalogerConfig{
MainModuleVersion: DefaultMainModuleVersionConfig(),
LocalModCacheDir: defaultGoModDir(),
}
// first process the proxy settings
@ -67,24 +68,25 @@ func DefaultCatalogerConfig() CatalogerConfig {
}
}
if g.LocalModCacheDir == "" {
goPath := os.Getenv("GOPATH")
if goPath == "" {
homeDir, err := homedir.Dir()
if err != nil {
log.Debug("unable to determine user home dir: %v", err)
} else {
goPath = filepath.Join(homeDir, "go")
}
}
if goPath != "" {
g.LocalModCacheDir = filepath.Join(goPath, "pkg", "mod")
}
}
return g
}
// defaultGoModDir returns $GOPATH/pkg/mod or $HOME/go/pkg/mod based on environment variables available
func defaultGoModDir() string {
goPath := os.Getenv("GOPATH")
if goPath == "" {
homeDir, err := homedir.Dir()
if err != nil {
log.Warnf("unable to determine GOPATH or user home dir: %w", err)
return ""
}
goPath = filepath.Join(homeDir, "go")
}
return filepath.Join(goPath, "pkg", "mod")
}
func DefaultMainModuleVersionConfig() MainModuleVersionConfig {
return MainModuleVersionConfig{
FromLDFlags: true,

View file

@ -1,6 +1,7 @@
package golang
import (
"path/filepath"
"testing"
"github.com/mitchellh/go-homedir"
@ -47,7 +48,7 @@ func Test_Config(t *testing.T) {
opts: opts{},
expected: CatalogerConfig{
SearchLocalModCacheLicenses: false,
LocalModCacheDir: "/go/pkg/mod",
LocalModCacheDir: filepath.Join("/go", "pkg", "mod"),
SearchRemoteLicenses: false,
Proxies: []string{"https://my.proxy"},
NoProxy: []string{"my.private", "no.proxy"},

View file

@ -21,25 +21,41 @@ import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
)
type goLicenses struct {
type goLicense struct {
Value string `json:"val,omitempty"`
SPDXExpression string `json:"spdx,omitempty"`
Type license.Type `json:"type,omitempty"`
URLs []string `json:"urls,omitempty"`
Locations []string `json:"locations,omitempty"`
}
type goLicenseResolver struct {
catalogerName string
opts CatalogerConfig
localModCacheResolver file.WritableResolver
localModCacheDir fs.FS
licenseCache cache.Resolver[[]goLicense]
lowerLicenseFileNames *strset.Set
}
func newGoLicenses(catalogerName string, opts CatalogerConfig) goLicenses {
return goLicenses{
func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseResolver {
var localModCacheDir fs.FS
if opts.SearchLocalModCacheLicenses {
localModCacheDir = os.DirFS(opts.LocalModCacheDir)
}
return goLicenseResolver{
catalogerName: catalogerName,
opts: opts,
localModCacheResolver: modCacheResolver(opts.LocalModCacheDir),
localModCacheDir: localModCacheDir,
licenseCache: cache.GetResolverCachingErrors[[]goLicense]("golang", "v1"),
lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...),
}
}
@ -63,98 +79,95 @@ func remotesForModule(proxies []string, noProxy []string, module string) []strin
return proxies
}
func modCacheResolver(modCacheDir string) file.WritableResolver {
var r file.WritableResolver
func (c *goLicenseResolver) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) ([]pkg.License, error) {
// search the scan target first, ignoring local and remote sources
goLicenses, err := c.findLicensesInSource(resolver,
fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
)
if err != nil || len(goLicenses) > 0 {
return toPkgLicenses(goLicenses), err
}
if modCacheDir == "" {
log.Trace("unable to determine mod cache directory, skipping mod cache resolver")
r = fileresolver.Empty{}
} else {
stat, err := os.Stat(modCacheDir)
if os.IsNotExist(err) || stat == nil || !stat.IsDir() {
log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir)
r = fileresolver.Empty{}
} else {
r = fileresolver.NewFromUnindexedDirectory(modCacheDir)
// look in the local host mod directory...
if c.opts.SearchLocalModCacheLicenses {
goLicenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
if err != nil || len(goLicenses) > 0 {
return toPkgLicenses(goLicenses), err
}
}
return r
}
func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) {
licenses, err = c.findLicenses(resolver,
fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
)
if err != nil || len(licenses) > 0 {
return requireCollection(licenses), err
// download from remote sources
if c.opts.SearchRemoteLicenses {
goLicenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
}
// look in the local host mod cache...
licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
if err != nil || len(licenses) > 0 {
return requireCollection(licenses), err
}
// we did not find it yet and remote searching was enabled
licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
return requireCollection(licenses), err
return toPkgLicenses(goLicenses), err
}
func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) {
if !c.opts.SearchLocalModCacheLicenses {
func (c *goLicenseResolver) getLicensesFromLocal(moduleName, moduleVersion string) ([]goLicense, error) {
if c.localModCacheDir == nil {
return nil, nil
}
// if we're running against a directory on the filesystem, it may not include the
// user's homedir / GOPATH, so we defer to using the localModCacheResolver
return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
}
subdir := moduleDir(moduleName, moduleVersion)
func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
if !c.opts.SearchRemoteLicenses {
return nil, nil
}
proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
fsys, err := getModule(proxies, moduleName, moduleVersion)
// get the local subdirectory containing the specific go module
dir, err := fs.Sub(c.localModCacheDir, subdir)
if err != nil {
return nil, err
}
dir := moduleDir(moduleName, moduleVersion)
// populate the mod cache with the results
err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
if err != nil {
log.Debug(err)
return nil
}
if d.IsDir() {
return nil
}
f, err := fsys.Open(filePath)
if err != nil {
return err
}
return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f)
})
if err != nil {
log.Tracef("remote proxy walk failed for: %s", moduleName)
}
return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
// if we're running against a directory on the filesystem, it may not include the
// user's homedir / GOPATH, so we defer to using the localModCacheResolver
// we use $GOPATH/pkg/mod to avoid leaking information about the user's system
return c.findLicensesInFS("file://$GOPATH/pkg/mod/"+subdir+"/", dir)
}
func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
out = make([]pkg.License, 0)
if resolver == nil {
return
}
func (c *goLicenseResolver) getLicensesFromRemote(moduleName, moduleVersion string) ([]goLicense, error) {
return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]goLicense, error) {
proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
urlPrefix, fsys, err := getModule(proxies, moduleName, moduleVersion)
if err != nil {
return nil, err
}
return c.findLicensesInFS(urlPrefix, fsys)
})
}
func (c *goLicenseResolver) findLicensesInFS(urlPrefix string, fsys fs.FS) ([]goLicense, error) {
var out []goLicense
err := fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, _ error) error {
if !c.lowerLicenseFileNames.Has(strings.ToLower(d.Name())) {
return nil
}
rdr, err := fsys.Open(filePath)
if err != nil {
log.Debugf("error opening license file %s: %v", filePath, err)
return nil
}
defer internal.CloseAndLogError(rdr, filePath)
parsed, err := licenses.Parse(rdr, file.NewLocation(filePath))
if err != nil {
log.Debugf("error parsing license file %s: %v", filePath, err)
return nil
}
// since these licenses are found in an external fs.FS, not in the scanned source,
// get rid of the locations but keep information about the where the license was found
// by prepending the urlPrefix to the internal path for an accurate representation
for _, l := range toGoLicenses(parsed) {
l.URLs = []string{urlPrefix + filePath}
l.Locations = nil
out = append(out, l)
}
return nil
})
return out, err
}
func (c *goLicenseResolver) findLicensesInSource(resolver file.Resolver, globMatch string) ([]goLicense, error) {
var out []goLicense
locations, err := resolver.FilesByGlob(globMatch)
if err != nil {
return nil, err
@ -168,11 +181,19 @@ func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out
out = append(out, parsed...)
}
return
// if we have a directory but simply don't have any found license files, indicate this so we
// don't re-download modules continually
if len(locations) > 0 && len(out) == 0 {
return nil, noLicensesFound{
glob: globMatch,
}
}
return out, nil
}
func (c *goLicenses) parseLicenseFromLocation(l file.Location, resolver file.Resolver) ([]pkg.License, error) {
var out []pkg.License
func (c *goLicenseResolver) parseLicenseFromLocation(l file.Location, resolver file.Resolver) ([]goLicense, error) {
var out []goLicense
fileName := path.Base(l.RealPath)
if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) {
contents, err := resolver.FileContentsByLocation(l)
@ -185,7 +206,7 @@ func (c *goLicenses) parseLicenseFromLocation(l file.Location, resolver file.Res
return nil, err
}
out = append(out, parsed...)
out = append(out, toGoLicenses(parsed)...)
}
return out, nil
}
@ -194,13 +215,9 @@ func moduleDir(moduleName, moduleVersion string) string {
return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
}
func moduleSearchGlob(moduleName, moduleVersion string) string {
return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion))
}
func requireCollection(licenses []pkg.License) []pkg.License {
func requireCollection[T any](licenses []T) []T {
if licenses == nil {
return make([]pkg.License, 0)
return make([]T, 0)
}
return licenses
}
@ -213,18 +230,19 @@ func processCaps(s string) string {
})
}
func getModule(proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) {
func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, err error) {
for _, proxy := range proxies {
u, _ := url.Parse(proxy)
if proxy == "direct" {
fsys, err = getModuleRepository(moduleName, moduleVersion)
urlPrefix, fsys, err = getModuleRepository(moduleName, moduleVersion)
continue
}
switch u.Scheme {
case "https", "http":
fsys, err = getModuleProxy(proxy, moduleName, moduleVersion)
urlPrefix, fsys, err = getModuleProxy(proxy, moduleName, moduleVersion)
case "file":
p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
urlPrefix = path.Join("file://", p) + "/"
fsys = os.DirFS(p)
}
if fsys != nil {
@ -234,13 +252,13 @@ func getModule(proxies []string, moduleName, moduleVersion string) (fsys fs.FS,
return
}
func getModuleProxy(proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) {
func getModuleProxy(proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, _ error) {
u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
// get the module zip
resp, err := http.Get(u) //nolint:gosec
if err != nil {
return nil, err
return "", nil, err
}
defer func() { _ = resp.Body.Close() }()
@ -250,25 +268,25 @@ func getModuleProxy(proxy string, moduleName string, moduleVersion string) (out
// try lowercasing it; some packages have mixed casing that really messes up the proxy
resp, err = http.Get(u) //nolint:gosec
if err != nil {
return nil, err
return "", nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
return "", nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
}
}
// read the zip
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
return "", nil, err
}
out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
versionPath := findVersionPath(out, ".")
out = getSubFS(out, versionPath)
return out, err
return u + "#" + versionPath + "/", out, err
}
func findVersionPath(f fs.FS, dir string) string {
@ -288,26 +306,111 @@ func findVersionPath(f fs.FS, dir string) string {
return ""
}
func getModuleRepository(moduleName string, moduleVersion string) (fs.FS, error) {
func getModuleRepository(moduleName string, moduleVersion string) (string, fs.FS, error) {
repoName := moduleName
parts := strings.Split(moduleName, "/")
if len(parts) > 2 {
repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
}
// see if there's a hash and use that if so, otherwise use a tag
splitVersion := strings.Split(moduleVersion, "-")
var cloneRefName plumbing.ReferenceName
refPath := ""
if len(splitVersion) < 3 {
tagName := splitVersion[0]
cloneRefName = plumbing.NewTagReferenceName(tagName)
refPath = "/tags/" + tagName
}
f := memfs.New()
buf := &bytes.Buffer{}
_, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
URL: fmt.Sprintf("https://%s", repoName),
ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA
repoURL := fmt.Sprintf("https://%s", repoName)
r, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
URL: repoURL,
ReferenceName: cloneRefName,
SingleBranch: true,
Depth: 1,
Progress: buf,
})
if err != nil {
return nil, fmt.Errorf("%w -- %s", err, buf.String())
return "", nil, fmt.Errorf("%w -- %s", err, buf.String())
}
return billyFSAdapter{fs: f}, nil
if len(splitVersion) > 2 {
sha := splitVersion[len(splitVersion)-1]
hash, err := r.ResolveRevision(plumbing.Revision(sha))
if err != nil || hash == nil {
log.Tracef("unable to resolve hash %s: %v", sha, err)
} else {
w, err := r.Worktree()
if err != nil {
log.Tracef("unable to get worktree, using default: %v", err)
}
err = w.Checkout(&git.CheckoutOptions{
Hash: *hash,
})
if err != nil {
log.Tracef("unable to checkout commit, using default: %v", err)
} else {
refPath = "/refs/" + hash.String()
}
}
}
return repoURL + refPath + "/", billyFSAdapter{fs: f}, err
}
type noLicensesFound struct {
glob string
}
func (l noLicensesFound) Error() string {
return fmt.Sprintf("unable to find license information matching: %s", l.glob)
}
var _ error = (*noLicensesFound)(nil)
func toPkgLicenses(goLicenses []goLicense) []pkg.License {
var out []pkg.License
for _, l := range goLicenses {
out = append(out, pkg.License{
Value: l.Value,
SPDXExpression: l.SPDXExpression,
Type: l.Type,
URLs: l.URLs,
Locations: toPkgLocations(l.Locations),
})
}
return requireCollection(out)
}
func toPkgLocations(goLocations []string) file.LocationSet {
out := file.NewLocationSet()
for _, l := range goLocations {
out.Add(file.NewLocation(l))
}
return out
}
func toGoLicenses(pkgLicenses []pkg.License) []goLicense {
var out []goLicense
for _, l := range pkgLicenses {
out = append(out, goLicense{
Value: l.Value,
SPDXExpression: l.SPDXExpression,
Type: l.Type,
URLs: l.URLs,
Locations: toGoLocations(l.Locations),
})
}
return out
}
func toGoLocations(locations file.LocationSet) []string {
var out []string
for _, l := range locations.ToSlice() {
out = append(out, l.RealPath)
}
return out
}

View file

@ -8,6 +8,7 @@ import (
"net/http/httptest"
"os"
"path"
"path/filepath"
"strings"
"testing"
@ -36,7 +37,8 @@ func Test_LocalLicenseSearch(t *testing.T) {
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
Locations: file.NewLocationSet(loc1),
URLs: []string{"file://$GOPATH/pkg/mod/" + loc1.RealPath},
Locations: file.NewLocationSet(),
},
},
{
@ -46,7 +48,8 @@ func Test_LocalLicenseSearch(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Concluded,
Locations: file.NewLocationSet(loc2),
URLs: []string{"file://$GOPATH/pkg/mod/" + loc2.RealPath},
Locations: file.NewLocationSet(),
},
},
{
@ -56,7 +59,8 @@ func Test_LocalLicenseSearch(t *testing.T) {
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
Locations: file.NewLocationSet(loc3),
URLs: []string{"file://$GOPATH/pkg/mod/" + loc3.RealPath},
Locations: file.NewLocationSet(),
},
},
}
@ -66,11 +70,11 @@ func Test_LocalLicenseSearch(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
l := newGoLicenses(
l := newGoLicenseResolver(
"",
CatalogerConfig{
SearchLocalModCacheLicenses: true,
LocalModCacheDir: path.Join(wd, "test-fixtures", "licenses", "pkg", "mod"),
LocalModCacheDir: filepath.Join(wd, "test-fixtures", "licenses", "pkg", "mod"),
},
)
licenses, err := l.getLicenses(fileresolver.Empty{}, test.name, test.version)
@ -97,7 +101,7 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
testDir := path.Join(wd, "test-fixtures", "licenses", "pkg", "mod", processCaps(modPath)+"@"+modVersion)
testDir := filepath.Join(wd, "test-fixtures", "licenses", "pkg", "mod", processCaps(modPath)+"@"+modVersion)
archive := zip.NewWriter(buf)
@ -106,9 +110,9 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
for _, f := range entries {
// the zip files downloaded contain a path to the repo that somewhat matches where it ends up on disk,
// so prefix entries with something similar
writer, err := archive.Create(path.Join("github.com/something/some@version", f.Name()))
writer, err := archive.Create(path.Join(moduleDir(modPath, modVersion), f.Name()))
require.NoError(t, err)
contents, err := os.ReadFile(path.Join(testDir, f.Name()))
contents, err := os.ReadFile(filepath.Join(testDir, f.Name()))
require.NoError(t, err)
_, err = writer.Write(contents)
require.NoError(t, err)
@ -136,7 +140,8 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: license.Concluded,
Locations: file.NewLocationSet(loc1),
URLs: []string{server.URL + "/github.com/someorg/somename/@v/v0.3.2.zip#" + loc1.RealPath},
Locations: file.NewLocationSet(),
},
},
{
@ -146,21 +151,20 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Concluded,
Locations: file.NewLocationSet(loc2),
URLs: []string{server.URL + "/github.com/CapORG/CapProject/@v/v4.111.5.zip#" + loc2.RealPath},
Locations: file.NewLocationSet(),
},
},
}
modDir := path.Join(t.TempDir())
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
l := newGoLicenses(
l := newGoLicenseResolver(
"",
CatalogerConfig{
SearchRemoteLicenses: true,
Proxies: []string{server.URL},
LocalModCacheDir: modDir,
},
)

View file

@ -15,7 +15,7 @@ func (c *goBinaryCataloger) newGoBinaryPackage(resolver file.Resolver, dep *debu
dep = dep.Replace
}
licenses, err := c.licenses.getLicenses(resolver, dep.Path, dep.Version)
licenses, err := c.licenseResolver.getLicenses(resolver, dep.Path, dep.Version)
if err != nil {
log.Tracef("error getting licenses for golang package: %s %v", dep.Path, err)
}

View file

@ -46,13 +46,13 @@ var (
const devel = "(devel)"
type goBinaryCataloger struct {
licenses goLicenses
licenseResolver goLicenseResolver
mainModuleVersion MainModuleVersionConfig
}
func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
return &goBinaryCataloger{
licenses: newGoLicenses(binaryCatalogerName, opts),
licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts),
mainModuleVersion: opts.MainModuleVersion,
}
}

View file

@ -19,7 +19,13 @@ import (
)
type goModCataloger struct {
licenses goLicenses
licenseResolver goLicenseResolver
}
func newGoModCataloger(opts CatalogerConfig) *goModCataloger {
return &goModCataloger{
licenseResolver: newGoLicenseResolver(modFileCatalogerName, opts),
}
}
// parseGoModFile takes a go.mod and lists all packages discovered.
@ -44,7 +50,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
}
for _, m := range f.Require {
licenses, err := c.licenses.getLicenses(resolver, m.Mod.Path, m.Mod.Version)
licenses, err := c.licenseResolver.getLicenses(resolver, m.Mod.Path, m.Mod.Version)
if err != nil {
log.Tracef("error getting licenses for package: %s %v", m.Mod.Path, err)
}
@ -65,7 +71,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
// remove any old packages and replace with new ones...
for _, m := range f.Replace {
licenses, err := c.licenses.getLicenses(resolver, m.New.Path, m.New.Version)
licenses, err := c.licenseResolver.getLicenses(resolver, m.New.Path, m.New.Version)
if err != nil {
log.Tracef("error getting licenses for package: %s %v", m.New.Path, err)
}

View file

@ -4,6 +4,7 @@ import (
"testing"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
@ -91,10 +92,11 @@ func TestParseGoMod(t *testing.T) {
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
c := goModCataloger{}
c := newGoModCataloger(DefaultCatalogerConfig())
pkgtest.NewCatalogTester().
FromFile(t, test.fixture).
Expects(test.expected, nil).
WithResolver(fileresolver.Empty{}).
TestParser(t, c.parseGoModFile)
})
}

View file

@ -0,0 +1,33 @@
package directorysource
import (
"os"
"strings"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/internal/fileresolver"
)
// we do not want to cache things and then subsequently scan them, if, for example a user runs `syft /` twice
func excludeCachePathVisitors() []fileresolver.PathIndexVisitor {
var out []fileresolver.PathIndexVisitor
for _, dir := range cache.GetManager().RootDirs() {
out = append(out, excludeCacheDirPathVisitor{
dir: dir,
}.excludeCacheDir)
}
return out
}
type excludeCacheDirPathVisitor struct {
dir string
}
func (d excludeCacheDirPathVisitor) excludeCacheDir(_, path string, _ os.FileInfo, _ error) error {
if strings.HasPrefix(path, d.dir) {
log.Tracef("skipping cache path: %s", path)
return fileresolver.ErrSkipPath
}
return nil
}

View file

@ -142,6 +142,9 @@ func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
return nil, err
}
// this should be the only file resolver that might have overlap with where files are cached
exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)
res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)