ignore target link files based on path (#667)

* ignore target link files based on path

log when files are actually indexed

add test for sym link resolution

golang test nits

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* nil catalog should act like an empty catalog

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* remove dir path filtering in favor of file type filtering

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* split out addPathToIndex into specialized functions

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add test for nul catalog enumeration

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* conditionally discover MIME types for file based on file resolver index

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* change logging around cataloging

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add tests to cover possible infinite symlink loop for resolver

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

Co-authored-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Jonas Galvão Xavier 2021-12-13 17:49:11 -08:00 committed by GitHub
parent 3a10ca7e6a
commit d3804d1a82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 623 additions and 225 deletions

View file

@ -18,7 +18,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-libpython3.7.so",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/libpython3.7.so",
location: "libpython3.7.so",
expected: []Classification{
{
Class: "python-binary",
@ -32,7 +32,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/python3.6",
location: "python3.6",
expected: []Classification{
{
Class: "python-binary",
@ -46,7 +46,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/patchlevel.h",
location: "patchlevel.h",
expected: []Classification{
{
Class: "cpython-source",
@ -60,7 +60,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-go",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/go",
location: "go",
expected: []Classification{
{
Class: "go-binary",
@ -74,7 +74,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-go-hint",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/VERSION",
location: "VERSION",
expected: []Classification{
{
Class: "go-binary-hint",
@ -88,7 +88,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
{
name: "positive-busybox",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/busybox",
location: "busybox",
expected: []Classification{
{
Class: "busybox-binary",

View file

@ -103,6 +103,10 @@ func (c *Catalog) Enumerate(types ...Type) <-chan Package {
channel := make(chan Package)
go func() {
defer close(channel)
if c == nil {
// we should allow enumerating from a catalog that was never created (which will result in no packages enumerated)
return
}
for ty, ids := range c.idsByType {
if len(types) != 0 {
found := false

View file

@ -3,6 +3,8 @@ package pkg
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/source"
@ -144,3 +146,8 @@ func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) {
}
}
func TestCatalog_EnumerateNilCatalog(t *testing.T) {
var c *Catalog
assert.Empty(t, c.Enumerate())
}

View file

@ -51,6 +51,7 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
var errs error
for _, theCataloger := range catalogers {
// find packages from the underlying raw data
log.Debugf("cataloging with %q", theCataloger.Name())
packages, relationships, err := theCataloger.Catalog(resolver)
if err != nil {
errs = multierror.Append(errs, err)
@ -59,7 +60,7 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
catalogedPackages := len(packages)
log.Debugf("package cataloger %q discovered %d packages", theCataloger.Name(), catalogedPackages)
log.Debugf("discovered %d packages", catalogedPackages)
packagesDiscovered.N += int64(catalogedPackages)
for _, p := range packages {

View file

@ -208,7 +208,14 @@ func (p PackageJSON) hasNameAndVersionValues() bool {
return p.Name != "" && p.Version != ""
}
func pathContainsNodeModulesDirectory(path string) bool {
isNodeModulesPath, _ := regexp.MatchString("[\\/]node_modules[\\/]", path)
return isNodeModulesPath
// this supports both windows and unix paths
var filepathSeparator = regexp.MustCompile(`[\\/]`)
func pathContainsNodeModulesDirectory(p string) bool {
for _, subPath := range filepathSeparator.Split(p, -1) {
if subPath == "node_modules" {
return true
}
}
return false
}

View file

@ -6,6 +6,7 @@ import (
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
"github.com/stretchr/testify/assert"
)
func TestParsePackageJSON(t *testing.T) {
@ -160,3 +161,50 @@ func TestParsePackageJSON_Partial(t *testing.T) { // see https://github.com/anch
t.Errorf("no packages should've been returned (but got %d packages)", actualCount)
}
}
func Test_pathContainsNodeModulesDirectory(t *testing.T) {
tests := []struct {
path string
expected bool
}{
// positive
{
path: "something/node_modules/package",
expected: true,
},
{
path: "node_modules/package",
expected: true,
},
{
path: "something/node_modules",
expected: true,
},
{
path: "\\something\\node_modules\\",
expected: true,
},
{
path: "\\something\\node_modules",
expected: true,
},
// negative
{
path: "something/node_bogus_modules",
expected: false,
},
{
path: "something/node_modules_bogus",
expected: false,
},
{
path: "something/node_bogus_modules/package",
expected: false,
},
}
for _, test := range tests {
t.Run(test.path, func(t *testing.T) {
assert.Equal(t, test.expected, pathContainsNodeModulesDirectory(test.path))
})
}
}

View file

@ -9,9 +9,10 @@ import (
"path/filepath"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/event"
@ -21,23 +22,21 @@ import (
var unixSystemRuntimePrefixes = []string{
"/proc",
"/sys",
"/dev",
"/sys",
}
var ErrIgnoreIrregularFile = errors.New("ignoring irregular file type")
var _ FileResolver = (*directoryResolver)(nil)
type pathFilterFn func(string) bool
type pathFilterFn func(string, os.FileInfo) bool
// directoryResolver implements path and content access for the directory data source.
type directoryResolver struct {
path string
cwdRelativeToRoot string
cwd string
fileTree *filetree.FileTree
metadata map[file.ID]FileMetadata
path string
currentWdRelativeToRoot string
currentWd string
fileTree *filetree.FileTree
metadata map[file.ID]FileMetadata
// TODO: wire up to report these paths in the json report
pathFilterFns []pathFilterFn
refsByMIMEType map[string][]file.Reference
@ -45,39 +44,41 @@ type directoryResolver struct {
}
func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) {
cwd, err := os.Getwd()
currentWd, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("could not create directory resolver: %w", err)
}
var cwdRelRoot string
var currentWdRelRoot string
if path.IsAbs(root) {
cwdRelRoot, err = filepath.Rel(cwd, root)
currentWdRelRoot, err = filepath.Rel(currentWd, root)
if err != nil {
return nil, fmt.Errorf("could not create directory resolver: %w", err)
}
} else {
currentWdRelRoot = filepath.Clean(root)
}
if pathFilters == nil {
pathFilters = []pathFilterFn{isUnixSystemRuntimePath}
pathFilters = []pathFilterFn{isUnallowableFileType, isUnixSystemRuntimePath}
}
resolver := directoryResolver{
path: root,
cwd: cwd,
cwdRelativeToRoot: cwdRelRoot,
fileTree: filetree.NewFileTree(),
metadata: make(map[file.ID]FileMetadata),
pathFilterFns: pathFilters,
refsByMIMEType: make(map[string][]file.Reference),
errPaths: make(map[string]error),
path: root,
currentWd: currentWd,
currentWdRelativeToRoot: currentWdRelRoot,
fileTree: filetree.NewFileTree(),
metadata: make(map[file.ID]FileMetadata),
pathFilterFns: pathFilters,
refsByMIMEType: make(map[string][]file.Reference),
errPaths: make(map[string]error),
}
return &resolver, indexAllRoots(root, resolver.indexTree)
}
func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) {
log.Infof("indexing filesystem path=%q", root)
log.Debugf("indexing filesystem path=%q", root)
var roots []string
var err error
@ -93,11 +94,8 @@ func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]st
// but continue forth with index regardless if the given root path exists or not.
fi, err := os.Stat(root)
if err != nil && fi != nil && !fi.IsDir() {
newRoot, err := r.addPathToIndex(root, fi)
if err = r.handleFileAccessErr(root, err); err != nil {
return nil, fmt.Errorf("unable to index path: %w", err)
}
// note: we want to index the path regardless of an error stat-ing the path
newRoot := r.indexPath(root, fi, nil)
if newRoot != "" {
roots = append(roots, newRoot)
}
@ -108,102 +106,133 @@ func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]st
func(path string, info os.FileInfo, err error) error {
stager.Current = path
newRoot, indexErr := r.indexPath(path, info, err)
newRoot := r.indexPath(path, info, err)
if newRoot != "" {
roots = append(roots, newRoot)
}
return indexErr
return nil
})
}
func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) {
func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) string {
// ignore any path which a filter function returns true
for _, filterFn := range r.pathFilterFns {
if filterFn(path) {
return "", nil
if filterFn(path, info) {
return ""
}
}
if err = r.handleFileAccessErr(path, err); err != nil {
return "", err
if r.isFileAccessErr(path, err) {
return ""
}
// link cycles could cause a revisit --we should not allow this
if r.fileTree.HasPath(file.Path(path)) {
return "", nil
return ""
}
if info == nil {
// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path)
return "", nil
return ""
}
newRoot, err := r.addPathToIndex(path, info)
if err = r.handleFileAccessErr(path, err); err != nil {
return "", fmt.Errorf("unable to index path: %w", err)
if r.isFileAccessErr(path, err) {
return ""
}
return newRoot, nil
return newRoot
}
func (r *directoryResolver) handleFileAccessErr(path string, err error) error {
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) || errors.Is(err, ErrIgnoreIrregularFile) {
// don't allow for permission errors to stop indexing, keep track of the paths and continue.
func (r *directoryResolver) isFileAccessErr(path string, err error) bool {
// don't allow for errors to stop indexing, keep track of the paths and continue.
if err != nil {
log.Warnf("unable to access path=%q: %+v", path, err)
r.errPaths[path] = err
return nil
} else if err != nil {
return fmt.Errorf("unable to access path=%q: %w", path, err)
return true
}
return nil
return false
}
func (r directoryResolver) addPathToIndex(p string, info os.FileInfo) (string, error) {
var ref *file.Reference
var err error
var newRoot string
switch newFileTypeFromMode(info.Mode()) {
switch t := newFileTypeFromMode(info.Mode()); t {
case SymbolicLink:
linkTarget, err := os.Readlink(p)
if err != nil {
return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
}
ref, err = r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget))
if err != nil {
return "", err
}
targetAbsPath := linkTarget
if !filepath.IsAbs(targetAbsPath) {
targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget))
}
newRoot = targetAbsPath
return r.addSymlinkToIndex(p, info)
case Directory:
ref, err = r.fileTree.AddDir(file.Path(p))
if err != nil {
return "", err
}
case IrregularFile:
return "", ErrIgnoreIrregularFile
return "", r.addDirectoryToIndex(p, info)
case RegularFile:
return "", r.addFileToIndex(p, info)
default:
ref, err = r.fileTree.AddFile(file.Path(p))
if err != nil {
return "", err
}
return "", fmt.Errorf("unsupported file type: %s", t)
}
}
func (r directoryResolver) addDirectoryToIndex(p string, info os.FileInfo) error {
ref, err := r.fileTree.AddDir(file.Path(p))
if err != nil {
return err
}
metadata := fileMetadataFromPath(p, info)
location := NewLocationFromDirectory(p, *ref)
metadata := fileMetadataFromPath(p, info, r.isInIndex(location))
r.addFileMetadataToIndex(ref, metadata)
return nil
}
func (r directoryResolver) addFileToIndex(p string, info os.FileInfo) error {
ref, err := r.fileTree.AddFile(file.Path(p))
if err != nil {
return err
}
location := NewLocationFromDirectory(p, *ref)
metadata := fileMetadataFromPath(p, info, r.isInIndex(location))
r.addFileMetadataToIndex(ref, metadata)
return nil
}
func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string, error) {
var usedInfo = info
linkTarget, err := os.Readlink(p)
if err != nil {
return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
}
// note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory
// in question (e.g. resolve to /dev/fd/2)
if !filepath.IsAbs(linkTarget) {
linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
}
ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget))
if err != nil {
return "", err
}
targetAbsPath := linkTarget
if !filepath.IsAbs(targetAbsPath) {
targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget))
}
location := NewLocationFromDirectory(p, *ref)
metadata := fileMetadataFromPath(p, usedInfo, r.isInIndex(location))
r.addFileMetadataToIndex(ref, metadata)
return targetAbsPath, nil
}
func (r directoryResolver) addFileMetadataToIndex(ref *file.Reference, metadata FileMetadata) {
if ref != nil {
r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref)
if metadata.MIMEType != "" {
r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref)
}
r.metadata[ref.ID()] = metadata
}
r.metadata[ref.ID()] = metadata
return newRoot, nil
}
func (r directoryResolver) requestPath(userPath string) (string, error) {
@ -212,7 +241,7 @@ func (r directoryResolver) requestPath(userPath string) (string, error) {
userPath = path.Join(r.path, userPath)
} else {
// ensure we take into account any relative difference between the root path and the CWD for relative requests
userPath = path.Join(r.cwdRelativeToRoot, userPath)
userPath = path.Join(r.currentWdRelativeToRoot, userPath)
}
var err error
@ -227,7 +256,7 @@ func (r directoryResolver) responsePath(path string) string {
// always return references relative to the request path (not absolute path)
if filepath.IsAbs(path) {
// we need to account for the cwd relative to the running process and the given root for the directory resolver
prefix := filepath.Clean(filepath.Join(r.cwd, r.cwdRelativeToRoot))
prefix := filepath.Clean(filepath.Join(r.currentWd, r.currentWdRelativeToRoot))
return strings.TrimPrefix(path, prefix+string(filepath.Separator))
}
return path
@ -283,8 +312,6 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
exists, ref, err := r.fileTree.File(file.Path(userStrPath))
if err == nil && exists {
references = append(references, NewLocationFromDirectory(r.responsePath(userStrPath), *ref))
} else {
log.Warnf("path (%s) not found in file tree: Exists: %t Err:%+v", userStrPath, exists, err)
}
}
@ -329,9 +356,22 @@ func (r directoryResolver) FileContentsByLocation(location Location) (io.ReadClo
if location.ref.RealPath == "" {
return nil, errors.New("empty path given")
}
if !r.isInIndex(location) {
// this is in cases where paths have been explicitly excluded from the tree index. In which case
// we should DENY all content requests. Why? These paths have been indicated to be inaccessible (either
// by preference or these files are not readable by the current user).
return nil, fmt.Errorf("file content is inaccessible path=%q", location.ref.RealPath)
}
return file.NewLazyReadCloser(string(location.ref.RealPath)), nil
}
func (r directoryResolver) isInIndex(location Location) bool {
if location.ref.RealPath == "" {
return false
}
return r.fileTree.HasPath(location.ref.RealPath, filetree.FollowBasenameLinks)
}
func (r *directoryResolver) AllLocations() <-chan Location {
results := make(chan Location)
go func() {
@ -364,10 +404,25 @@ func (r *directoryResolver) FilesByMIMEType(types ...string) ([]Location, error)
return locations, nil
}
func isUnixSystemRuntimePath(path string) bool {
func isUnixSystemRuntimePath(path string, _ os.FileInfo) bool {
return internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...)
}
func isUnallowableFileType(_ string, info os.FileInfo) bool {
if info == nil {
// we can't filter out by filetype for non-existent files
return false
}
switch newFileTypeFromMode(info.Mode()) {
case CharacterDevice, Socket, BlockDevice, FIFONode, IrregularFile:
return true
// note: symlinks that point to these files may still get by. We handle this later in processing to help prevent
// against infinite links traversal.
}
return false
}
func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error {
// why account for multiple roots? To cover cases when there is a symlink that references above the root path,
// in which case we need to additionally index where the link resolves to. it's for this reason why the filetree

View file

@ -1,14 +1,15 @@
package source
import (
"io/fs"
"io/ioutil"
"os"
"path"
"path/filepath"
"reflect"
"strings"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/require"
@ -19,6 +20,55 @@ import (
"github.com/wagoodman/go-progress"
)
func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) {
cases := []struct {
name string
relativeRoot string
input string
expected []string
}{
{
name: "should find a file from an absolute input",
relativeRoot: "./test-fixtures/",
input: "/image-symlinks/file-1.txt",
expected: []string{
"image-symlinks/file-1.txt",
},
},
{
name: "should find a file from a relative path",
relativeRoot: "./test-fixtures/",
input: "image-symlinks/file-1.txt",
expected: []string{
"image-symlinks/file-1.txt",
},
},
{
name: "should find a file from a relative path (root above cwd)",
relativeRoot: "../",
input: "sbom/sbom.go",
expected: []string{
"sbom/sbom.go",
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
resolver, err := newDirectoryResolver(c.relativeRoot)
assert.NoError(t, err)
refs, err := resolver.FilesByPath(c.input)
require.NoError(t, err)
assert.Len(t, refs, len(c.expected))
s := strset.New()
for _, actual := range refs {
s.Add(actual.RealPath)
}
assert.ElementsMatch(t, c.expected, s.List())
})
}
}
func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) {
cases := []struct {
name string
@ -85,23 +135,16 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) {
{
name: "finds a file (relative)",
root: "./test-fixtures/",
input: "test-fixtures/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
input: "image-symlinks/file-1.txt",
expected: "image-symlinks/file-1.txt",
refCount: 1,
},
{
name: "finds a file with relative indirection",
root: "./test-fixtures/../test-fixtures",
input: "test-fixtures/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
refCount: 1,
},
{
// note: this is asserting the old behavior is not supported
name: "relative lookup with wrong path fails",
root: "./test-fixtures/",
input: "image-symlinks/file-1.txt",
refCount: 0,
expected: "image-symlinks/file-1.txt",
refCount: 1,
},
{
name: "managed non-existing files (relative)",
@ -113,7 +156,7 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) {
name: "finds a file (absolute)",
root: "./test-fixtures/",
input: "/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
expected: "image-symlinks/file-1.txt",
refCount: 1,
},
{
@ -158,17 +201,17 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) {
}{
{
name: "finds multiple files",
input: []string{"test-fixtures/image-symlinks/file-1.txt", "test-fixtures/image-symlinks/file-2.txt"},
input: []string{"image-symlinks/file-1.txt", "image-symlinks/file-2.txt"},
refCount: 2,
},
{
name: "skips non-existing files",
input: []string{"test-fixtures/image-symlinks/bogus.txt", "test-fixtures/image-symlinks/file-1.txt"},
input: []string{"image-symlinks/bogus.txt", "image-symlinks/file-1.txt"},
refCount: 1,
},
{
name: "does not return anything for non-existing directories",
input: []string{"test-fixtures/non-existing/bogus.txt", "test-fixtures/non-existing/file-1.txt"},
input: []string{"non-existing/bogus.txt", "non-existing/file-1.txt"},
refCount: 0,
},
}
@ -210,7 +253,7 @@ func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) {
assert.NoError(t, err)
assert.Len(t, refs, 1)
assert.Equal(t, "test-fixtures/image-symlinks/file-1.txt", refs[0].RealPath)
assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath)
}
func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) {
@ -218,95 +261,128 @@ func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) {
resolver, err := newDirectoryResolver("test-fixtures/system_paths/target")
assert.NoError(t, err)
// ensure the correct filter function is wired up by default
expectedFn := reflect.ValueOf(isUnixSystemRuntimePath)
expectedFn := reflect.ValueOf(isUnallowableFileType)
actualFn := reflect.ValueOf(resolver.pathFilterFns[0])
assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer())
// all paths should be found (non filtering matches a path)
refs, err := resolver.FilesByGlob("**/place")
locations, err := resolver.FilesByGlob("**/place")
assert.NoError(t, err)
// 4: within target/
// 1: target/link --> relative path to "place"
// 1: outside_root/link_target/place
assert.Len(t, refs, 6)
assert.Len(t, locations, 6)
// ensure that symlink indexing outside of root worked
testLocation := "test-fixtures/system_paths/outside_root/link_target/place"
ok := false
test_location := "test-fixtures/system_paths/outside_root/link_target/place"
for _, actual_loc := range refs {
if test_location == actual_loc.RealPath {
for _, location := range locations {
if strings.HasSuffix(location.RealPath, testLocation) {
ok = true
}
}
if !ok {
t.Fatalf("could not find test location=%q", test_location)
t.Fatalf("could not find test location=%q", testLocation)
}
}
func TestDirectoryResolverUsesPathFilterFunction(t *testing.T) {
// let's make certain that the index honors the filter function
filter := func(s string) bool {
// a dummy function that works for testing purposes
return strings.Contains(s, "dev/place") || strings.Contains(s, "proc/place") || strings.Contains(s, "sys/place")
}
var _ fs.FileInfo = (*testFileInfo)(nil)
resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", filter)
assert.NoError(t, err)
// ensure the correct filter function is wired up by default
expectedFn := reflect.ValueOf(filter)
actualFn := reflect.ValueOf(resolver.pathFilterFns[0])
assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer())
assert.Len(t, resolver.pathFilterFns, 1)
refs, err := resolver.FilesByGlob("**/place")
assert.NoError(t, err)
// target/home/place + target/link/.../place + outside_root/.../place
assert.Len(t, refs, 3)
type testFileInfo struct {
mode os.FileMode
}
func Test_isUnixSystemRuntimePath(t *testing.T) {
func (t testFileInfo) Name() string {
panic("implement me")
}
func (t testFileInfo) Size() int64 {
panic("implement me")
}
func (t testFileInfo) Mode() fs.FileMode {
return t.mode
}
func (t testFileInfo) ModTime() time.Time {
panic("implement me")
}
func (t testFileInfo) IsDir() bool {
panic("implement me")
}
func (t testFileInfo) Sys() interface{} {
panic("implement me")
}
func Test_isUnallowableFileType(t *testing.T) {
tests := []struct {
path string
name string
info os.FileInfo
expected bool
}{
{
path: "proc/place",
name: "regular file",
info: testFileInfo{
mode: 0,
},
expected: false,
},
{
path: "/proc/place",
expected: true,
},
{
path: "/proc",
expected: true,
},
{
path: "/pro/c",
name: "dir",
info: testFileInfo{
mode: os.ModeDir,
},
expected: false,
},
{
path: "/pro",
name: "symlink",
info: testFileInfo{
mode: os.ModeSymlink,
},
expected: false,
},
{
path: "/dev",
name: "socket",
info: testFileInfo{
mode: os.ModeSocket,
},
expected: true,
},
{
path: "/sys",
name: "named pipe",
info: testFileInfo{
mode: os.ModeNamedPipe,
},
expected: true,
},
{
path: "/something/sys",
expected: false,
name: "char device",
info: testFileInfo{
mode: os.ModeCharDevice,
},
expected: true,
},
{
name: "block device",
info: testFileInfo{
mode: os.ModeDevice,
},
expected: true,
},
{
name: "irregular",
info: testFileInfo{
mode: os.ModeIrregular,
},
expected: true,
},
}
for _, test := range tests {
t.Run(test.path, func(t *testing.T) {
assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path))
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, isUnallowableFileType("dont/care", test.info))
})
}
}
@ -345,9 +421,7 @@ func Test_directoryResolver_index(t *testing.T) {
// note: the index uses absolute paths, so assertions MUST keep this in mind
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("could not get working dir: %+v", err)
}
require.NoError(t, err)
p := file.Path(path.Join(cwd, test.path))
assert.Equal(t, true, r.fileTree.HasPath(p))
@ -365,32 +439,27 @@ func Test_handleFileAccessErr(t *testing.T) {
tests := []struct {
name string
input error
expectedErr error
expectedPathTracked bool
}{
{
name: "permission error does not propagate",
input: os.ErrPermission,
expectedPathTracked: true,
expectedErr: nil,
},
{
name: "file does not exist error does not propagate",
input: os.ErrNotExist,
expectedPathTracked: true,
expectedErr: nil,
},
{
name: "non-permission errors propagate",
name: "non-permission errors are tracked",
input: os.ErrInvalid,
expectedPathTracked: false,
expectedErr: os.ErrInvalid,
expectedPathTracked: true,
},
{
name: "non-errors ignored",
input: nil,
expectedPathTracked: false,
expectedErr: nil,
},
}
@ -400,7 +469,7 @@ func Test_handleFileAccessErr(t *testing.T) {
errPaths: make(map[string]error),
}
p := "a/place"
assert.ErrorIs(t, r.handleFileAccessErr(p, test.input), test.expectedErr)
assert.Equal(t, r.isFileAccessErr(p, test.input), test.expectedPathTracked)
_, exists := r.errPaths[p]
assert.Equal(t, test.expectedPathTracked, exists)
})
@ -493,7 +562,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) {
{
fixturePath: "./test-fixtures/image-simple",
mimeType: "text/plain",
expectedPaths: strset.New("test-fixtures/image-simple/file-1.txt", "test-fixtures/image-simple/file-2.txt", "test-fixtures/image-simple/target/really/nested/file-3.txt", "test-fixtures/image-simple/Dockerfile"),
expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"),
},
}
for _, test := range tests {
@ -511,33 +580,81 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) {
}
}
func Test_ignoreIrregularFiles(t *testing.T) {
// NOTE: craeting a pipe/fifo file on demand since git doesn't let me
// commit one. The directory resolver should ignore it (and any other
// file that is neither regular, a symlink or directory) when indexing a directory
dir := "./test-fixtures/irregular-files"
f := "f.fifo"
err := syscall.Mknod(filepath.Join(dir, f), syscall.S_IFIFO|0666, 0)
assert.NoError(t, err)
defer func() {
err := os.Remove(filepath.Join(dir, f))
assert.NoError(t, err)
}()
fileRefs, err := ioutil.ReadDir(dir)
assert.NoError(t, err)
assert.Len(t, fileRefs, 2) // two files (f.fifo and readme)
resolver, err := newDirectoryResolver(dir)
func Test_IndexingNestedSymLinks(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple")
assert.NoError(t, err)
assert.Len(t, resolver.fileTree.AllFiles(), 1)
rp := resolver.fileTree.AllFiles()[0].RealPath
assert.True(t, strings.Contains(string(rp), filepath.Join(dir, "readme")))
// check that we can get the real path
locations, err := resolver.FilesByPath("./readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
// check that we can access the same file via 1 symlink
locations, err = resolver.FilesByPath("./link_to_new_readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
// check that we can access the same file via 2 symlinks
locations, err = resolver.FilesByPath("./link_to_link_to_new_readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
}
func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) {
filterFn := func(path string, _ os.FileInfo) bool {
return strings.HasSuffix(path, string(filepath.Separator)+"readme")
}
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn)
assert.NoError(t, err)
var testingLocations []Location
// the path to the real file is PRUNED from the index, so we should NOT expect a location returned
locations, err := resolver.FilesByPath("./readme")
require.NoError(t, err)
assert.Empty(t, locations)
// check that we can access the same file via 1 symlink
locations, err = resolver.FilesByPath("./link_to_new_readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
testingLocations = append(testingLocations, locations...)
// check that we can access the same file via 2 symlinks
locations, err = resolver.FilesByPath("./link_to_link_to_new_readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
testingLocations = append(testingLocations, locations...)
// check that we CANNOT get contents from any of the link locations
for _, location := range testingLocations {
contentReader, err := resolver.FileContentsByLocation(location)
assert.Errorf(t, err, "expected an error for getting content from a location not in the index")
assert.Nil(t, contentReader)
}
}
func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-roots/root")
assert.NoError(t, err)
// check that we can get the real path
locations, err := resolver.FilesByPath("./readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
// check that we can access the same file via 2 symlinks (link_to_link_to_readme -> link_to_readme -> readme)
locations, err = resolver.FilesByPath("./link_to_link_to_readme")
require.NoError(t, err)
assert.Len(t, locations, 1)
}
func Test_directoryResolver_FileContentsByLocation(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
tests := []struct {
name string
location Location
@ -547,7 +664,7 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) {
{
name: "use file reference for content requests",
location: NewLocationFromDirectory("some/place", file.Reference{
RealPath: "test-fixtures/image-simple/file-1.txt",
RealPath: file.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt")),
}),
expects: "this file has contents",
},
@ -578,3 +695,79 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) {
})
}
}
func Test_isUnixSystemRuntimePath(t *testing.T) {
tests := []struct {
path string
expected bool
}{
{
path: "proc/place",
expected: false,
},
{
path: "/proc/place",
expected: true,
},
{
path: "/proc",
expected: true,
},
{
path: "/pro/c",
expected: false,
},
{
path: "/pro",
expected: false,
},
{
path: "/dev",
expected: true,
},
{
path: "/sys",
expected: true,
},
{
path: "/something/sys",
expected: false,
},
}
for _, test := range tests {
t.Run(test.path, func(t *testing.T) {
assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path, nil))
})
}
}
func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) {
test := func(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop")
require.NoError(t, err)
locations, err := resolver.FilesByGlob("**/file.target")
require.NoError(t, err)
// Note: I'm not certain that this behavior is correct, but it is not an infinite loop (which is the point of the test)
// - block/loop0/file.target
// - devices/loop0/file.target
// - devices/loop0/subsystem/loop0/file.target
assert.Len(t, locations, 3)
}
testWithTimeout(t, 5*time.Second, test)
}
func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) {
done := make(chan bool)
go func() {
test(t)
done <- true
}()
select {
case <-time.After(timeout):
t.Fatal("test timed out")
case <-done:
}
}

View file

@ -3,10 +3,10 @@ package source
import (
"os"
"github.com/anchore/syft/internal/log"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/stereoscope/pkg/image"
)
@ -37,19 +37,24 @@ func fileMetadataByLocation(img *image.Image, location Location) (FileMetadata,
}, nil
}
func fileMetadataFromPath(path string, info os.FileInfo) FileMetadata {
func fileMetadataFromPath(path string, info os.FileInfo, withMIMEType bool) FileMetadata {
var mimeType string
uid, gid := GetXid(info)
f, err := os.Open(path)
if err != nil {
// TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns
f = nil
} else {
defer func() {
if err := f.Close(); err != nil {
log.Warnf("unable to close file while obtaining metadata: %s", path)
}
}()
if withMIMEType {
f, err := os.Open(path)
if err != nil {
// TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns
f = nil
} else {
defer func() {
if err := f.Close(); err != nil {
log.Warnf("unable to close file while obtaining metadata: %s", path)
}
}()
}
mimeType = file.MIMEType(f)
}
return FileMetadata{
@ -58,6 +63,6 @@ func fileMetadataFromPath(path string, info os.FileInfo) FileMetadata {
// unsupported across platforms
UserID: uid,
GroupID: gid,
MIMEType: file.MIMEType(f),
MIMEType: mimeType,
}
}

View file

@ -0,0 +1,54 @@
package source
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_fileMetadataFromPath(t *testing.T) {
tests := []struct {
path string
withMIMEType bool
expectedType string
expectedMIMEType string
}{
{
path: "test-fixtures/symlinks-simple/readme",
withMIMEType: true,
expectedType: "RegularFile",
expectedMIMEType: "text/plain",
},
{
path: "test-fixtures/symlinks-simple/link_to_new_readme",
withMIMEType: true,
expectedType: "SymbolicLink",
expectedMIMEType: "text/plain",
},
{
path: "test-fixtures/symlinks-simple/readme",
withMIMEType: false,
expectedType: "RegularFile",
expectedMIMEType: "",
},
{
path: "test-fixtures/symlinks-simple/link_to_new_readme",
withMIMEType: false,
expectedType: "SymbolicLink",
expectedMIMEType: "",
},
}
for _, test := range tests {
t.Run(test.path, func(t *testing.T) {
info, err := os.Lstat(test.path)
require.NoError(t, err)
actual := fileMetadataFromPath(test.path, info, test.withMIMEType)
assert.Equal(t, test.expectedMIMEType, actual.MIMEType)
assert.Equal(t, test.expectedType, string(actual.Type))
})
}
}

View file

@ -6,8 +6,7 @@ import (
)
const (
UnknownFileType FileType = "UnknownFileType"
RegularFile FileType = "RegularFile"
RegularFile FileType = "RegularFile"
// IrregularFile is how syft defines files that are neither regular, symbolic or directory.
// For ref: the seven standard Unix file types are regular, directory, symbolic link,
// FIFO special, block special, character special, and socket as defined by POSIX.
@ -18,6 +17,7 @@ const (
BlockDevice FileType = "BlockDevice"
Directory FileType = "Directory"
FIFONode FileType = "FIFONode"
Socket FileType = "Socket"
)
type FileType string
@ -39,19 +39,32 @@ func newFileTypeFromTarHeaderTypeFlag(flag byte) FileType {
case tar.TypeFifo:
return FIFONode
}
return UnknownFileType
return IrregularFile
}
// TODO: fill in more types from mod...
func newFileTypeFromMode(mode os.FileMode) FileType {
switch {
case mode&os.ModeSymlink == os.ModeSymlink:
case isSet(mode, os.ModeSymlink):
return SymbolicLink
case isSet(mode, os.ModeIrregular):
return IrregularFile
case isSet(mode, os.ModeCharDevice):
return CharacterDevice
case isSet(mode, os.ModeDevice):
return BlockDevice
case isSet(mode, os.ModeNamedPipe):
return FIFONode
case isSet(mode, os.ModeSocket):
return Socket
case mode.IsDir():
return Directory
case !mode.IsRegular():
return IrregularFile
default:
case mode.IsRegular():
return RegularFile
default:
return IrregularFile
}
}
func isSet(mode, field os.FileMode) bool {
return mode&field != 0
}

View file

@ -55,19 +55,19 @@ func TestNewFromDirectory(t *testing.T) {
{
desc: "path detected",
input: "test-fixtures",
inputPaths: []string{"test-fixtures/path-detected/.vimrc"},
inputPaths: []string{"path-detected/.vimrc"},
expRefs: 1,
},
{
desc: "directory ignored",
input: "test-fixtures",
inputPaths: []string{"test-fixtures/path-detected"},
inputPaths: []string{"path-detected"},
expRefs: 0,
},
{
desc: "no files-by-path detected",
input: "test-fixtures",
inputPaths: []string{"test-fixtures/no-path-detected"},
inputPaths: []string{"no-path-detected"},
expRefs: 0,
},
}
@ -184,21 +184,21 @@ func TestNewFromDirectoryShared(t *testing.T) {
desc: "path detected",
input: "test-fixtures",
notExist: "foobar/",
inputPaths: []string{"test-fixtures/path-detected/.vimrc"},
inputPaths: []string{"path-detected/.vimrc"},
expRefs: 1,
},
{
desc: "directory ignored",
input: "test-fixtures",
notExist: "foobar/",
inputPaths: []string{"test-fixtures/path-detected"},
inputPaths: []string{"path-detected"},
expRefs: 0,
},
{
desc: "no files-by-path detected",
input: "test-fixtures",
notExist: "foobar/",
inputPaths: []string{"test-fixtures/no-path-detected"},
inputPaths: []string{"no-path-detected"},
expRefs: 0,
},
}
@ -306,7 +306,9 @@ func TestFilesByGlob(t *testing.T) {
t.Errorf("could not get resolver error: %+v", err)
}
contents, err := resolver.FilesByGlob(test.glob)
if err != nil {
t.Errorf("could not get files by glob: %s+v", err)
}
if len(contents) != test.expected {
t.Errorf("unexpected number of files found by glob (%s): %d != %d", test.glob, len(contents), test.expected)
}

View file

@ -0,0 +1 @@
this mimics a partial layout on a linux system within /sys/devices/virtual to help with ensuring globbing for files does not end up in a infinite loop

View file

@ -0,0 +1 @@
../devices/loop0

View file

@ -0,0 +1 @@
contents!

View file

@ -0,0 +1 @@
../../block

View file

@ -0,0 +1 @@
../root/readme

View file

@ -0,0 +1 @@
../outside/link_to_readme

View file

@ -0,0 +1 @@
contents!

View file

@ -0,0 +1 @@
./link_to_new_readme

View file

@ -0,0 +1 @@
./readme

View file

@ -24,7 +24,7 @@ func TestDirectoryScanCompletesWithinTimeout(t *testing.T) {
select {
case <-done:
break
case <-time.After(5 * time.Second):
case <-time.After(10 * time.Second):
t.Fatalf("directory scan is taking too long")
}