update json schema with optional poweruser data shape

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-03-18 08:56:00 -04:00
parent 97f0f83544
commit 6a960ec1f3
No known key found for this signature in database
GPG key ID: 5CB45AE22BAB7EA7
12 changed files with 201 additions and 75 deletions

View file

@ -6,5 +6,5 @@ const (
// JSONSchemaVersion is the current schema version output by the JSON presenter
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "1.0.3"
JSONSchemaVersion = "1.0.4"
)

View file

@ -1,10 +1,10 @@
# JSON Schema
This is the JSON schema for output from the JSON presenter (`syft <img> -o json`). The required inputs for defining the JSON schema are as follows:
This is the JSON schema for output from the JSON presenters (`syft packages <img> -o json` and `syft power-user <img>`). The required inputs for defining the JSON schema are as follows:
- the value of `internal.JSONSchemaVersion` that governs the schema filename
- the `Document` struct definition within `syft/presenters/json/document.go` that governs the overall document shape
- the `metadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata`
- the `Document` struct definition within `internal/presenters/poweruser/json_document.go` that governs the overall document shape
- the `artifactMetadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata`
With regard to testing the JSON schema, integration test cases provided by the developer are used as examples to validate that JSON output from Syft is always valid relative to the `schema/json/schema-$VERSION.json` file.
@ -26,7 +26,7 @@ When adding a new `pkg.*Metadata` that is assigned to the `pkg.Package.Metadata`
are done:
- a new integration test case is added to `test/integration/pkg_cases_test.go` that exercises the new package type with the new metadata
- the new metadata struct is added to the `metadataContainer` struct within `schema/json/generate.go`
- the new metadata struct is added to the `artifactMetadataContainer` struct within `schema/json/generate.go`
## Generating a New Schema

View file

@ -6,13 +6,14 @@ import (
"fmt"
"io/ioutil"
"os"
"reflect"
"sort"
"strings"
"github.com/alecthomas/jsonschema"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/presenter/poweruser"
"github.com/anchore/syft/syft/pkg"
jsonPresenter "github.com/anchore/syft/syft/presenter/json"
)
/*
@ -25,7 +26,7 @@ can be extended to include specific package metadata struct shapes in the future
// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}).
// When a new package metadata definition is created it will need to be manually added here. The variable name does
// not matter as long as it is exported.
type metadataContainer struct {
type artifactMetadataContainer struct {
Apk pkg.ApkMetadata
Dpkg pkg.DpkgMetadata
Gem pkg.GemMetadata
@ -36,10 +37,23 @@ type metadataContainer struct {
Cargo pkg.CargoPackageMetadata
}
// nolint:funlen
func main() {
metadataSchema := jsonschema.Reflect(&metadataContainer{})
documentSchema := jsonschema.Reflect(&jsonPresenter.Document{})
write(encode(build()))
}
func build() *jsonschema.Schema {
reflector := &jsonschema.Reflector{
AllowAdditionalProperties: true,
TypeNamer: func(r reflect.Type) string {
name := r.Name()
if strings.HasPrefix(name, "JSON") {
name = strings.TrimPrefix(name, "JSON")
}
return name
},
}
documentSchema := reflector.ReflectFromType(reflect.TypeOf(&poweruser.JSONDocument{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{}))
// TODO: inject source definitions
@ -47,7 +61,7 @@ func main() {
var metadataNames []string
for name, definition := range metadataSchema.Definitions {
if name == "metadataContainer" {
if name == "artifactMetadataContainer" {
// ignore the definition for the fake container
continue
}
@ -71,22 +85,30 @@ func main() {
}
// set the "anyOf" field for Package.Metadata to be a conjunction of several types
documentSchema.Definitions["Package"].Properties.Set("metadata", map[string][]map[string]string{
documentSchema.Definitions["Document"].Properties.Set("artifacts.metadata", map[string][]map[string]string{
"anyOf": metadataTypes,
})
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
return documentSchema
}
func encode(schema *jsonschema.Schema) []byte {
var newSchemaBuffer = new(bytes.Buffer)
enc := json.NewEncoder(newSchemaBuffer)
// prevent > and < from being escaped in the payload
enc.SetEscapeHTML(false)
enc.SetIndent("", " ")
err := enc.Encode(&documentSchema)
err := enc.Encode(&schema)
if err != nil {
panic(err)
}
return newSchemaBuffer.Bytes()
}
func write(schema []byte) {
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
if _, err := os.Stat(filename); !os.IsNotExist(err) {
// check if the schema is the same...
existingFh, err := os.Open(filename)
@ -99,7 +121,7 @@ func main() {
panic(err)
}
if bytes.Equal(existingSchemaBytes, newSchemaBuffer.Bytes()) {
if bytes.Equal(existingSchemaBytes, schema) {
// the generated schema is the same, bail with no error :)
fmt.Println("No change to the existing schema!")
os.Exit(0)
@ -115,7 +137,7 @@ func main() {
panic(err)
}
_, err = fh.Write(newSchemaBuffer.Bytes())
_, err = fh.Write(schema)
if err != nil {
panic(err)
}

View file

@ -749,4 +749,4 @@
"type": "object"
}
}
}
}

View file

@ -36,7 +36,7 @@ var identityFiles = []parseEntry{
}
// Identify parses distro-specific files to determine distro metadata like version and release.
func Identify(resolver source.Resolver) *Distro {
func Identify(resolver source.FileResolver) *Distro {
var distro *Distro
identifyLoop:

View file

@ -99,7 +99,12 @@ func TestIdentifyDistro(t *testing.T) {
t.Fatalf("unable to produce a new source for testing: %s", test.fixture)
}
d := Identify(s.Resolver)
resolver, err := s.FileResolver(source.SquashedScope)
if err != nil {
t.Fatalf("unable to get resolver: %+v", err)
}
d := Identify(resolver)
if d == nil {
if test.Type == UnknownDistroType {
return

View file

@ -10,11 +10,11 @@ const (
// AppUpdateAvailable is a partybus event that occurs when an application update is available
AppUpdateAvailable partybus.EventType = "syft-app-update-available"
// CatalogerStarted is a partybus event that occurs when the package cataloging has begun
CatalogerStarted partybus.EventType = "syft-cataloger-started-event"
// PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun
PackageCatalogerStarted partybus.EventType = "syft-cataloger-started-event"
// CatalogerFinished is a partybus event that occurs when the package cataloging has completed
CatalogerFinished partybus.EventType = "syft-cataloger-finished-event"
// PresenterReady is a partybus event that occurs when an analysis result is ready for final presentation
PresenterReady partybus.EventType = "syft-presenter-ready-event"
// ImportStarted is a partybus event that occurs when an SBOM upload process has begun
ImportStarted partybus.EventType = "syft-import-started-event"

View file

@ -6,11 +6,12 @@ package parsers
import (
"fmt"
"github.com/anchore/syft/internal/presenter"
"github.com/wagoodman/go-progress"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/presenter"
"github.com/anchore/syft/syft/pkg/cataloger"
"github.com/wagoodman/go-partybus"
)
@ -40,7 +41,7 @@ func checkEventType(actual, expected partybus.EventType) error {
}
func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
if err := checkEventType(e.Type, event.CatalogerStarted); err != nil {
if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil {
return nil, err
}
@ -52,8 +53,8 @@ func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
return &monitor, nil
}
func ParseCatalogerFinished(e partybus.Event) (presenter.Presenter, error) {
if err := checkEventType(e.Type, event.CatalogerFinished); err != nil {
func ParsePresenterReady(e partybus.Event) (presenter.Presenter, error) {
if err := checkEventType(e.Type, event.PresenterReady); err != nil {
return nil, err
}

6
syft/file/digest.go Normal file
View file

@ -0,0 +1,6 @@
package file
type Digest struct {
Algorithm string `json:"algorithm"`
Value string `json:"value"`
}

View file

@ -0,0 +1,98 @@
package file
import (
"crypto"
"fmt"
"hash"
"io"
"strings"
"github.com/anchore/syft/syft/source"
)
var supportedHashAlgorithms = make(map[string]crypto.Hash)
type DigestsCataloger struct {
resolver source.FileResolver
hashes []crypto.Hash
}
func init() {
for _, h := range []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA256,
} {
supportedHashAlgorithms[cleanAlgorithmName(h.String())] = h
}
}
func NewDigestsCataloger(resolver source.FileResolver, hashAlgorithms []string) (*DigestsCataloger, error) {
var hashes []crypto.Hash
for _, hashStr := range hashAlgorithms {
name := cleanAlgorithmName(hashStr)
hashObj, ok := supportedHashAlgorithms[name]
if !ok {
return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
}
hashes = append(hashes, hashObj)
}
return &DigestsCataloger{
resolver: resolver,
hashes: hashes,
}, nil
}
func (i *DigestsCataloger) Catalog() (map[source.Location][]Digest, error) {
results := make(map[source.Location][]Digest)
for location := range i.resolver.AllLocations() {
result, err := i.catalogLocation(location)
if err != nil {
return nil, err
}
results[location] = result
}
return results, nil
}
func (i *DigestsCataloger) catalogLocation(location source.Location) ([]Digest, error) {
contentReader, err := i.resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer contentReader.Close()
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes))
writers := make([]io.Writer, len(i.hashes))
for idx, hashObj := range i.hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), contentReader)
if err != nil {
return nil, fmt.Errorf("unable to observe contents of %+v: %+v", location.RealPath, err)
}
result := make([]Digest, len(i.hashes))
if size > 0 {
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: cleanAlgorithmName(i.hashes[idx].String()),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
}
return result, nil
}
func cleanAlgorithmName(name string) string {
lower := strings.ToLower(name)
return strings.Replace(lower, "-", "", -1)
}

View file

@ -0,0 +1,28 @@
package file
import (
"github.com/anchore/syft/syft/source"
)
type MetadataCataloger struct {
resolver source.FileResolver
}
func NewMetadataCataloger(resolver source.FileResolver) *MetadataCataloger {
return &MetadataCataloger{
resolver: resolver,
}
}
func (i *MetadataCataloger) Catalog() (map[source.Location]source.FileMetadata, error) {
results := make(map[source.Location]source.FileMetadata)
for location := range i.resolver.AllLocations() {
metadata, err := i.resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
}
results[location] = metadata
}
return results, nil
}

View file

@ -17,32 +17,29 @@ Similar to the cataloging process, Linux distribution identification is also per
package syft
import (
"encoding/json"
"fmt"
"io"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/logger"
"github.com/anchore/syft/syft/pkg"
jsonPresenter "github.com/anchore/syft/syft/presenter/json"
"github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/source"
"github.com/wagoodman/go-partybus"
)
// Catalog the given image from a particular perspective (e.g. squashed source, all-layers source). Returns the discovered
// set of packages, the identified Linux distribution, and the source object used to wrap the data source.
func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog, *distro.Distro, error) {
theSource, cleanup, err := source.New(userInput, scope)
defer cleanup()
// CatalogPackages takes an inventory of packages from the given image from a particular perspective
// (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux
// distribution, and the source object used to wrap the data source.
func CatalogPackages(src source.Source, scope source.Scope) (*pkg.Catalog, *distro.Distro, error) {
resolver, err := src.FileResolver(scope)
if err != nil {
return source.Source{}, nil, nil, err
return nil, nil, fmt.Errorf("unable to determine FileResolver while cataloging packages: %w", err)
}
// find the distro
theDistro := distro.Identify(theSource.Resolver)
theDistro := distro.Identify(resolver)
if theDistro != nil {
log.Infof("identified distro: %s", theDistro.String())
} else {
@ -51,7 +48,7 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
// conditionally use the correct set of loggers based on the input type (container image or directory)
var catalogers []cataloger.Cataloger
switch theSource.Metadata.Scheme {
switch src.Metadata.Scheme {
case source.ImageScheme:
log.Info("cataloging image")
catalogers = cataloger.ImageCatalogers()
@ -59,46 +56,15 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
log.Info("cataloging directory")
catalogers = cataloger.DirectoryCatalogers()
default:
return source.Source{}, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", theSource.Metadata.Scheme)
return nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme)
}
catalog, err := cataloger.Catalog(theSource.Resolver, theDistro, catalogers...)
catalog, err := cataloger.Catalog(resolver, theDistro, catalogers...)
if err != nil {
return source.Source{}, nil, nil, err
return nil, nil, err
}
return theSource, catalog, theDistro, nil
}
// CatalogFromJSON takes an existing syft report and generates native syft objects.
func CatalogFromJSON(reader io.Reader) (source.Metadata, *pkg.Catalog, *distro.Distro, error) {
var doc jsonPresenter.Document
var err error
decoder := json.NewDecoder(reader)
if err := decoder.Decode(&doc); err != nil {
return source.Metadata{}, nil, nil, err
}
var pkgs = make([]pkg.Package, len(doc.Artifacts))
for i, a := range doc.Artifacts {
pkgs[i], err = a.ToPackage()
if err != nil {
return source.Metadata{}, nil, nil, err
}
}
catalog := pkg.NewCatalog(pkgs...)
var theDistro *distro.Distro
if doc.Distro.Name != "" {
d, err := distro.NewDistro(distro.Type(doc.Distro.Name), doc.Distro.Version, doc.Distro.IDLike)
if err != nil {
return source.Metadata{}, nil, nil, err
}
theDistro = &d
}
return doc.Source.ToSourceMetadata(), catalog, theDistro, nil
return catalog, theDistro, nil
}
// SetLogger sets the logger object used for all syft logging calls.