Merge pull request #308 from anchore/prefer-real-paths-on-glob-results

Prefer real paths over those with links for glob results
This commit is contained in:
Alex Goodman 2021-01-05 21:32:24 -05:00 committed by GitHub
commit 0f6288881b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 64 additions and 38 deletions

View file

@ -61,7 +61,7 @@ func TestPackageSbomToModel(t *testing.T) {
FoundBy: "foundBy",
Locations: []source.Location{
{
Path: "path",
RealPath: "path",
FileSystemID: "layerID",
},
},
@ -139,7 +139,7 @@ func TestPackageSbomImport(t *testing.T) {
FoundBy: "foundBy",
Locations: []source.Location{
{
Path: "path",
RealPath: "path",
FileSystemID: "layerID",
},
},

View file

@ -101,7 +101,7 @@ func (c *GenericCataloger) catalog(contents map[source.Location]io.ReadCloser) (
continue
}
entries, err := parser(location.Path, content)
entries, err := parser(location.RealPath, content)
if err != nil {
// TODO: should we fail? or only log?
log.Warnf("cataloger '%s' failed to parse entries (location=%+v): %+v", c.upstreamCataloger, location, err)

View file

@ -101,7 +101,7 @@ func TestGenericCataloger(t *testing.T) {
for _, p := range actualPkgs {
ref := p.Locations[0]
exP, ok := expectedPkgs[ref.Path]
exP, ok := expectedPkgs[ref.RealPath]
if !ok {
t.Errorf("missing expected pkg: ref=%+v", ref)
continue

View file

@ -48,7 +48,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
pkgs, err = parseDpkgStatus(dbContents)
if err != nil {
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.Path, err)
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
}
md5ContentsByName, md5RefsByName, err := fetchMd5Contents(resolver, dbLocation, pkgs)
@ -104,7 +104,7 @@ func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs
var md5FileMatches []source.Location
var nameByRef = make(map[source.Location]string)
parentPath := filepath.Dir(dbLocation.Path)
parentPath := filepath.Dir(dbLocation.RealPath)
for _, p := range pkgs {
// look for /var/lib/dpkg/info/NAME:ARCH.md5sums

View file

@ -79,7 +79,7 @@ func TestDpkgCataloger(t *testing.T) {
// we will test the sources separately
var sourcesList = make([]string, len(a.Locations))
for i, s := range a.Locations {
sourcesList[i] = s.Path
sourcesList[i] = s.RealPath
}
a.Locations = nil

View file

@ -38,7 +38,7 @@ func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, err
for _, entry := range entries {
p, err := c.catalogEggOrWheel(entry)
if err != nil {
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.RealPath, err)
}
if p != nil {
packages = append(packages, *p)
@ -118,7 +118,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *packageEntry) (*pkg.Package,
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *packageEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
var sources = []source.Location{entry.Metadata.Location}
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, entry.Metadata.Contents)
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.RealPath, entry.Metadata.Contents)
if err != nil {
return nil, nil, err
}

View file

@ -19,11 +19,11 @@ func newPackageEntry(resolver source.FileResolver, metadataLocation source.Locat
// to reconcile the RECORD path to the same layer (or a lower layer). The same is true with the top_level.txt file.
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath)
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(metadataLocation.Path)
parentDir := filepath.Dir(metadataLocation.RealPath)
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)

View file

@ -43,7 +43,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
pkgs, err = parseRpmDB(resolver, location, dbContentReader)
if err != nil {
return nil, fmt.Errorf("unable to catalog rpmdb package=%+v: %w", location.Path, err)
return nil, fmt.Errorf("unable to catalog rpmdb package=%+v: %w", location.RealPath, err)
}
}
return pkgs, nil

View file

@ -62,7 +62,7 @@ identifyLoop:
content, err := ioutil.ReadAll(contentReader)
if err != nil {
log.Errorf("unable to read %q: %+v", location.Path, err)
log.Errorf("unable to read %q: %+v", location.RealPath, err)
break
}

View file

@ -30,7 +30,7 @@ func TestCycloneDxDirsPresenter(t *testing.T) {
Type: pkg.DebPkg,
FoundBy: "the-cataloger-1",
Locations: []source.Location{
{Path: "/some/path/pkg1"},
{RealPath: "/some/path/pkg1"},
},
Metadata: pkg.DpkgMetadata{
Package: "package1",
@ -44,7 +44,7 @@ func TestCycloneDxDirsPresenter(t *testing.T) {
Type: pkg.DebPkg,
FoundBy: "the-cataloger-2",
Locations: []source.Location{
{Path: "/some/path/pkg1"},
{RealPath: "/some/path/pkg1"},
},
Licenses: []string{
"MIT",

View file

@ -36,7 +36,7 @@ func TestJsonDirsPresenter(t *testing.T) {
Type: pkg.PythonPkg,
FoundBy: "the-cataloger-1",
Locations: []source.Location{
{Path: "/some/path/pkg1"},
{RealPath: "/some/path/pkg1"},
},
Language: pkg.Python,
MetadataType: pkg.PythonPackageMetadataType,
@ -56,7 +56,7 @@ func TestJsonDirsPresenter(t *testing.T) {
Type: pkg.DebPkg,
FoundBy: "the-cataloger-2",
Locations: []source.Location{
{Path: "/some/path/pkg1"},
{RealPath: "/some/path/pkg1"},
},
MetadataType: pkg.DpkgMetadataType,
Metadata: pkg.DpkgMetadata{

View file

@ -117,6 +117,10 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) {
t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path)
}
if expected.path != "" && string(actual.ref.RealPath) != actual.RealPath {
t.Errorf("we should always prefer real paths over ones with links")
}
entry, err := img.FileCatalog.Get(actual.ref)
if err != nil {
t.Fatalf("failed to get metadata: %+v", err)
@ -221,6 +225,10 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) {
t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path)
}
if expected.path != "" && string(actual.ref.RealPath) != actual.RealPath {
t.Errorf("we should always prefer real paths over ones with links")
}
entry, err := img.FileCatalog.Get(actual.ref)
if err != nil {
t.Fatalf("failed to get metadata: %+v", err)

View file

@ -54,20 +54,20 @@ func TestContentRequester(t *testing.T) {
}
for _, entry := range data {
if expected, ok := test.expectedContents[entry.Location.Path]; ok {
if expected, ok := test.expectedContents[entry.Location.RealPath]; ok {
actualBytes, err := ioutil.ReadAll(entry.Contents)
if err != nil {
t.Fatalf("could not read %q: %+v", entry.Location.Path, err)
t.Fatalf("could not read %q: %+v", entry.Location.RealPath, err)
}
for expected != string(actualBytes) {
t.Errorf("mismatched contents for %q", entry.Location.Path)
t.Errorf("mismatched contents for %q", entry.Location.RealPath)
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(expected, string(actualBytes), true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
continue
}
t.Errorf("could not find %q", entry.Location.Path)
t.Errorf("could not find %q", entry.Location.RealPath)
}
})
}

View file

@ -110,7 +110,7 @@ func (r *DirectoryResolver) RelativeFileByPath(_ Location, path string) *Locatio
func (r DirectoryResolver) MultipleFileContentsByLocation(locations []Location) (map[Location]io.ReadCloser, error) {
refContents := make(map[Location]io.ReadCloser)
for _, location := range locations {
refContents[location] = file.NewDeferredReadCloser(location.Path)
refContents[location] = file.NewDeferredReadCloser(location.RealPath)
}
return refContents, nil
}
@ -118,5 +118,5 @@ func (r DirectoryResolver) MultipleFileContentsByLocation(locations []Location)
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned.
func (r DirectoryResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) {
return file.NewDeferredReadCloser(location.Path), nil
return file.NewDeferredReadCloser(location.RealPath), nil
}

View file

@ -80,8 +80,8 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) {
}
for _, actual := range refs {
if actual.Path != c.expected {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.expected)
if actual.RealPath != c.expected {
t.Errorf("bad resolve path: '%s'!='%s'", actual.RealPath, c.expected)
}
}
})

View file

@ -106,6 +106,10 @@ func TestImageSquashResolver_FilesByPath(t *testing.T) {
t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath)
}
if c.resolvePath != "" && string(actual.ref.RealPath) != actual.RealPath {
t.Errorf("we should always prefer real paths over ones with links")
}
entry, err := img.FileCatalog.Get(actual.ref)
if err != nil {
t.Fatalf("failed to get metadata: %+v", err)
@ -208,6 +212,10 @@ func TestImageSquashResolver_FilesByGlob(t *testing.T) {
t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath)
}
if c.resolvePath != "" && string(actual.ref.RealPath) != actual.RealPath {
t.Errorf("we should always prefer real paths over ones with links")
}
entry, err := img.FileCatalog.Get(actual.ref)
if err != nil {
t.Fatalf("failed to get metadata: %+v", err)

View file

@ -7,9 +7,11 @@ import (
"github.com/anchore/stereoscope/pkg/image"
)
// Location represents a path relative to a particular filesystem.
// Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key
// in content fetching to uniquely identify a file relative to a request (the VirtualPath).
type Location struct {
Path string `json:"path"` // The string path of the location (e.g. /etc/hosts)
RealPath string `json:"path"` // The path where all path ancestors have no hardlinks / symlinks
VirtualPath string `json:"-"` // The path to the file which may or may not have hardlinks / symlinks
FileSystemID string `json:"layerID,omitempty"` // An ID representing the filesystem. For container images this is a layer digest, directories or root filesystem this is blank.
ref file.Reference // The file reference relative to the stereoscope.FileCatalog that has more information about this location.
}
@ -17,23 +19,25 @@ type Location struct {
// NewLocation creates a new Location representing a path without denoting a filesystem or FileCatalog reference.
func NewLocation(path string) Location {
return Location{
Path: path,
RealPath: path,
}
}
// NewLocationFromImage creates a new Location representing the given path (extracted from the ref) relative to the given image.
func NewLocationFromImage(path string, ref file.Reference, img *image.Image) Location {
func NewLocationFromImage(virtualPath string, ref file.Reference, img *image.Image) Location {
entry, err := img.FileCatalog.Get(ref)
if err != nil {
log.Warnf("unable to find file catalog entry for ref=%+v", ref)
return Location{
Path: path,
ref: ref,
VirtualPath: virtualPath,
RealPath: string(ref.RealPath),
ref: ref,
}
}
return Location{
Path: path,
VirtualPath: virtualPath,
RealPath: string(ref.RealPath),
FileSystemID: entry.Layer.Metadata.Digest,
ref: ref,
}

View file

@ -31,7 +31,7 @@ func NewMockResolverForPaths(paths ...string) *MockResolver {
// HasPath indicates if the given path exists in the underlying source.
func (r MockResolver) HasPath(path string) bool {
for _, l := range r.Locations {
if l.Path == path {
if l.RealPath == path {
return true
}
}
@ -40,7 +40,7 @@ func (r MockResolver) HasPath(path string) bool {
// String returns the string representation of the MockResolver.
func (r MockResolver) String() string {
return fmt.Sprintf("mock:(%s,...)", r.Locations[0].Path)
return fmt.Sprintf("mock:(%s,...)", r.Locations[0].RealPath)
}
// FileContentsByLocation fetches file contents for a single location. If the
@ -48,7 +48,7 @@ func (r MockResolver) String() string {
func (r MockResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) {
for _, l := range r.Locations {
if l == location {
return os.Open(location.Path)
return os.Open(location.RealPath)
}
}
@ -74,7 +74,7 @@ func (r MockResolver) FilesByPath(paths ...string) ([]Location, error) {
var results []Location
for _, p := range paths {
for _, location := range r.Locations {
if p == location.Path {
if p == location.RealPath {
results = append(results, NewLocation(p))
}
}
@ -88,7 +88,7 @@ func (r MockResolver) FilesByGlob(patterns ...string) ([]Location, error) {
var results []Location
for _, pattern := range patterns {
for _, location := range r.Locations {
if file.GlobMatch(pattern, location.Path) {
if file.GlobMatch(pattern, location.RealPath) {
results = append(results, location)
}
}

View file

@ -16,8 +16,8 @@ type Resolver interface {
// ContentResolver knows how to get file content for given file.References
type ContentResolver interface {
FileContentsByLocation(Location) (io.ReadCloser, error)
// TODO: it is possible to be given duplicate locations that will be overridden in the map (key), a subtle problem that coule easily be misued.
MultipleFileContentsByLocation([]Location) (map[Location]io.ReadCloser, error)
// TODO: we should consider refactoring to return a set of io.Readers or file.Openers instead of the full contents themselves (allow for optional buffering).
}
// FileResolver knows how to get a Location for given string paths and globs

View file

@ -81,6 +81,12 @@ func TestCatalogFromJSON(t *testing.T) {
e.Metadata = metadata
}
// ignore the virtual path on the location for now
for l := range a.Locations {
a.Locations[l].VirtualPath = ""
e.Locations[l].VirtualPath = ""
}
for _, d := range deep.Equal(a, e) {
// ignore errors for empty collections vs nil for select fields
// TODO: this is brittle, but not dangerously so. We should still find a better way to do this.