add mem profile option and refactor python cataloger for batch requests

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-12-11 16:14:00 -05:00
parent be5917a058
commit 82c8a8e17b
No known key found for this signature in database
GPG key ID: 5CB45AE22BAB7EA7
6 changed files with 141 additions and 89 deletions

2
.gitignore vendored
View file

@ -33,3 +33,5 @@ coverage.txt
# macOS Finder metadata
.DS_STORE
*.profile

View file

@ -5,9 +5,10 @@ import (
"fmt"
"io/ioutil"
"os"
"runtime/pprof"
"strings"
"github.com/pkg/profile"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/anchore"
"github.com/anchore/syft/internal/bus"
@ -55,24 +56,19 @@ You can also explicitly specify the scheme to use:
os.Exit(1)
}
if appConfig.Dev.ProfileCPU && appConfig.Dev.ProfileMem {
log.Errorf("cannot profile CPU and memory simultaneously")
os.Exit(1)
}
if appConfig.Dev.ProfileCPU {
f, err := os.Create("cpu.profile")
if err != nil {
log.Errorf("unable to create CPU profile: %+v", err)
} else {
err := pprof.StartCPUProfile(f)
if err != nil {
log.Errorf("unable to start CPU profile: %+v", err)
}
}
defer profile.Start(profile.CPUProfile).Stop()
} else if appConfig.Dev.ProfileMem {
defer profile.Start(profile.MemProfile).Stop()
}
err := doRunCmd(cmd, args)
if appConfig.Dev.ProfileCPU {
pprof.StopCPUProfile()
}
if err != nil {
log.Errorf(err.Error())
os.Exit(1)

1
go.mod
View file

@ -24,6 +24,7 @@ require (
github.com/olekukonko/tablewriter v0.0.4
github.com/package-url/packageurl-go v0.1.0
github.com/pelletier/go-toml v1.8.0
github.com/pkg/profile v1.5.0
github.com/scylladb/go-set v1.0.2
github.com/sergi/go-diff v1.1.0
github.com/sirupsen/logrus v1.6.0

13
go.sum
View file

@ -126,7 +126,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/anchore/client-go v0.0.0-20201120223920-9f812673f4d6/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk=
github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74 h1:9kkKTIyXJC+/syUcY6KWxFoJZJ+GWwrIscF+gBY067k=
github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk=
github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12 h1:xbeIbn5F52JVx3RUIajxCj8b0y+9lywspql4sFhcxWQ=
@ -135,14 +134,6 @@ github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0v
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E=
github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409 h1:xKSpDRjmYrEFrdMeDh4AuSUAFc99pdro6YFBKxy2um0=
github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0 h1:wa0hdnvBeCpI+rmzDbPG7k5SKlsGkot7aZ8Az1i/vws=
github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7 h1:G3LnRqHL/IIeQZTAMtDOJNYfSYsXLNCZX4DCiS0R0FY=
github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4 h1:XDuCqOWKyQQlKhd9kEDnyKbvSCwShKBDCsyBmD/ALYs=
github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM=
github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e h1:vHUqHTvH9/oxdDDh1fxS9Ls9gWGytKO7XbbzcQ9MBwI=
github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
@ -274,6 +265,7 @@ github.com/facebookincubator/nvdtools v0.1.4/go.mod h1:0/FIVnSEl9YHXLq3tKBPpKaI0
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
@ -700,6 +692,8 @@ github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.5.0 h1:042Buzk+NhDI+DeSAA62RwJL8VAuZUMQZUjCsRz1Mug=
github.com/pkg/profile v1.5.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18=
github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@ -824,6 +818,7 @@ github.com/tetafro/godot v0.4.2/go.mod h1:/7NLHhv08H1+8DNj0MElpAACw1ajsCuf3TKNQx
github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk=
github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94 h1:ig99OeTyDwQWhPe2iw9lwfQVF1KB3Q4fpP3X7/2VBG8=
github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk=
github.com/tj/assert v0.0.0-20171129193455-018094318fb0 h1:Rw8kxzWo1mr6FSaYXjQELRe88y2KdfynXdnK72rdjtA=
github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0=
github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0=
github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao=

View file

@ -56,6 +56,7 @@ type anchore struct {
type Development struct {
ProfileCPU bool `mapstructure:"profile-cpu"`
ProfileMem bool `mapstructure:"profile-mem"`
}
// LoadApplicationConfig populates the given viper object with application configuration discovered on disk
@ -222,4 +223,5 @@ func setNonCliDefaultValues(v *viper.Viper) {
v.SetDefault("log.structured", false)
v.SetDefault("check-for-app-update", true)
v.SetDefault("dev.profile-cpu", false)
v.SetDefault("dev.profile-mem", false)
}

View file

@ -43,22 +43,101 @@ func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, err
fileMatches = append(fileMatches, matches...)
}
request, entries := filesOfInterest(resolver, fileMatches)
if err := getContents(resolver, request); err != nil {
return nil, err
}
var pkgs []pkg.Package
for _, location := range fileMatches {
p, err := c.catalogEggOrWheel(resolver, location)
for _, entry := range entries {
p, err := c.catalogEggOrWheel(entry)
if err != nil {
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.Path, err)
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
}
if p != nil {
pkgs = append(pkgs, *p)
}
}
return pkgs, nil
}
type FileData struct {
Location source.Location
Contents string
}
type pythonEntry struct {
Metadata FileData
FileRecord *FileData
TopPackage *FileData
}
func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) {
var request = make(map[source.Location]*FileData)
var entries []*pythonEntry
for _, metadataLocation := range metadataLocations {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath)
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(metadataLocation.Path)
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
entry := &pythonEntry{
Metadata: FileData{
Location: metadataLocation,
},
}
request[entry.Metadata.Location] = &entry.Metadata
if recordLocation != nil {
entry.FileRecord = &FileData{
Location: *recordLocation,
}
request[entry.FileRecord.Location] = entry.FileRecord
}
if topLevelLocation != nil {
entry.TopPackage = &FileData{
Location: *topLevelLocation,
}
request[entry.TopPackage.Location] = entry.TopPackage
}
entries = append(entries, entry)
}
return request, entries
}
func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error {
var locations []source.Location
for l := range request {
locations = append(locations, l)
}
response, err := resolver.MultipleFileContentsByLocation(locations)
if err != nil {
return err
}
for l, contents := range response {
request[l].Contents = contents
}
return nil
}
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataLocation source.Location) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataLocation)
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
if err != nil {
return nil, err
}
@ -81,26 +160,45 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataL
}, nil
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
var sources = []source.Location{entry.Metadata.Location}
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
if err != nil {
return nil, nil, err
}
// attach any python files found for the given wheel/egg installation
r, s, err := c.fetchRecordFiles(entry.FileRecord)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.Files = r
// attach any top-level package names found for the given wheel/egg installation
p, s, err := c.fetchTopLevelPackages(entry.TopPackage)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.TopLevelPackages = p
return &metadata, sources, nil
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath)
if recordRef != nil {
sources = append(sources, *recordRef)
recordContents, err := resolver.FileContentsByLocation(*recordRef)
if err != nil {
return nil, nil, err
}
if entry != nil {
sources = append(sources, entry.Location)
// parse the record contents
records, err := parseWheelOrEggRecord(strings.NewReader(recordContents))
records, err := parseWheelOrEggRecord(strings.NewReader(entry.Contents))
if err != nil {
return nil, nil, err
}
@ -111,25 +209,16 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLo
}
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) {
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(metadataLocation.Path)
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelRef := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
if topLevelRef == nil {
log.Warnf("missing python package top_level.txt (package=%q)", metadataLocation.Path)
func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) {
if entry == nil {
// TODO
log.Warnf("missing python package top_level.txt (package=!!)")
return nil, nil, nil
}
sources = append(sources, *topLevelRef)
sources = append(sources, entry.Location)
topLevelContents, err := resolver.FileContentsByLocation(*topLevelRef)
if err != nil {
return nil, nil, err
}
scanner := bufio.NewScanner(strings.NewReader(topLevelContents))
scanner := bufio.NewScanner(strings.NewReader(entry.Contents))
for scanner.Scan() {
pkgs = append(pkgs, scanner.Text())
}
@ -140,36 +229,3 @@ func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metad
return pkgs, sources, nil
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.Resolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) {
var sources = []source.Location{metadataLocation}
metadataContents, err := resolver.FileContentsByLocation(metadataLocation)
if err != nil {
return nil, nil, err
}
metadata, err := parseWheelOrEggMetadata(metadataLocation.Path, strings.NewReader(metadataContents))
if err != nil {
return nil, nil, err
}
// attach any python files found for the given wheel/egg installation
r, s, err := c.fetchRecordFiles(resolver, metadataLocation)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.Files = r
// attach any top-level package names found for the given wheel/egg installation
p, s, err := c.fetchTopLevelPackages(resolver, metadataLocation)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.TopLevelPackages = p
return &metadata, sources, nil
}