feat: catalog python files for installed-files.txt file metadata (#1217)

Co-authored-by: houdini91 <mdstrauss91@gmail.com>
This commit is contained in:
Christopher Angelo Phillips 2022-09-19 16:08:02 -04:00 committed by GitHub
parent c2005fad8d
commit 04d288b364
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 117 additions and 2 deletions

View file

@ -8,6 +8,7 @@ import (
"path/filepath"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
@ -91,13 +92,44 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad
return p, nil
}
// fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer).
// find the installed-files.txt file relative to the directory where the METADATA file resides (in path AND layer structure)
installedFilesPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "installed-files.txt")
installedFilesRef := resolver.RelativeFileByPath(metadataLocation, installedFilesPath)
if installedFilesRef != nil {
sources = append(sources, *installedFilesRef)
installedFilesContents, err := resolver.FileContentsByLocation(*installedFilesRef)
if err != nil {
return nil, nil, err
}
defer internal.CloseAndLogError(installedFilesContents, installedFilesPath)
// parse the installed-files contents
installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath)
if err != nil {
log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err)
return files, sources, nil
}
files = append(files, installedFiles...)
}
return files, sources, nil
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
// find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath)
@ -206,6 +238,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv
if err != nil {
return nil, nil, err
}
if len(r) == 0 {
r, s, err = c.fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath)
if err != nil {
return nil, nil, err
}
}
sources = append(sources, s...)
metadata.Files = r

View file

@ -1,9 +1,11 @@
package python
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"path/filepath"
"strings"
"github.com/anchore/syft/internal/log"
@ -59,3 +61,34 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
return records, nil
}
func parseInstalledFiles(reader io.Reader, location, sitePackagesRootPath string) ([]pkg.PythonFileRecord, error) {
var installedFiles []pkg.PythonFileRecord
r := bufio.NewReader(reader)
for {
line, err := r.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("unable to read python installed-files file: %w", err)
}
if location != "" && sitePackagesRootPath != "" {
joinedPath := filepath.Join(filepath.Dir(location), line)
line, err = filepath.Rel(sitePackagesRootPath, joinedPath)
if err != nil {
return nil, err
}
}
installedFile := pkg.PythonFileRecord{
Path: strings.ReplaceAll(line, "\n", ""),
}
installedFiles = append(installedFiles, installedFile)
}
return installedFiles, nil
}

View file

@ -55,5 +55,42 @@ func TestParseWheelEggRecord(t *testing.T) {
}
})
}
}
func TestParseInstalledFiles(t *testing.T) {
tests := []struct {
Fixture string
ExpectedMetadata []pkg.PythonFileRecord
}{
{
Fixture: "test-fixtures/installed-files/installed-files.txt",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "../__pycache__/dicttoxml.cpython-36.pyc"},
{Path: "../dicttoxml.py"},
{Path: "PKG-INFO"},
{Path: "SOURCES.txt"},
{Path: "dependency_links.txt"},
{Path: "top_level.txt"},
},
},
}
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseInstalledFiles(fixture, "", "")
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}
for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
}
})
}
}

View file

@ -0,0 +1,6 @@
../__pycache__/dicttoxml.cpython-36.pyc
../dicttoxml.py
PKG-INFO
SOURCES.txt
dependency_links.txt
top_level.txt