mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-14 00:47:21 +00:00
0024b6ce77
* feat: support docker image history scanning * refactor: collapse error handling into return Style suggestion from review feedback. * fix: associate layers with history entries Where possible, add the associated layer to the history entry record. This may help tracing any issues discovered. This also changes the entry reference format to `image-metadata:history:%d:created-by` which _may_ be more self-explanatory.
446 lines
12 KiB
Go
446 lines
12 KiB
Go
package docker
|
|
|
|
import (
|
|
"archive/tar"
|
|
"compress/gzip"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/google/go-containerregistry/pkg/authn"
|
|
"github.com/google/go-containerregistry/pkg/name"
|
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
|
"github.com/google/go-containerregistry/pkg/v1/tarball"
|
|
"golang.org/x/sync/errgroup"
|
|
"google.golang.org/protobuf/proto"
|
|
"google.golang.org/protobuf/types/known/anypb"
|
|
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
|
|
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
|
|
)
|
|
|
|
const SourceType = sourcespb.SourceType_SOURCE_TYPE_DOCKER
|
|
|
|
type Source struct {
|
|
name string
|
|
sourceId sources.SourceID
|
|
jobId sources.JobID
|
|
verify bool
|
|
concurrency int
|
|
conn sourcespb.Docker
|
|
sources.Progress
|
|
sources.CommonSourceUnitUnmarshaller
|
|
}
|
|
|
|
// Ensure the Source satisfies the interfaces at compile time.
|
|
var _ sources.Source = (*Source)(nil)
|
|
var _ sources.SourceUnitUnmarshaller = (*Source)(nil)
|
|
|
|
// Type returns the type of source.
|
|
// It is used for matching source types in configuration and job input.
|
|
func (s *Source) Type() sourcespb.SourceType {
|
|
return SourceType
|
|
}
|
|
|
|
func (s *Source) SourceID() sources.SourceID {
|
|
return s.sourceId
|
|
}
|
|
|
|
func (s *Source) JobID() sources.JobID {
|
|
return s.jobId
|
|
}
|
|
|
|
// Init initializes the source.
|
|
func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
|
|
s.name = name
|
|
s.sourceId = sourceId
|
|
s.jobId = jobId
|
|
s.verify = verify
|
|
s.concurrency = concurrency
|
|
|
|
// Reset metrics for this source at initialization time.
|
|
dockerImagesScanned.WithLabelValues(s.name).Set(0)
|
|
dockerLayersScanned.WithLabelValues(s.name).Set(0)
|
|
|
|
if err := anypb.UnmarshalTo(connection, &s.conn, proto.UnmarshalOptions{}); err != nil {
|
|
return fmt.Errorf("error unmarshalling connection: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
type imageInfo struct {
|
|
image v1.Image
|
|
base string
|
|
tag string
|
|
}
|
|
|
|
type historyEntryInfo struct {
|
|
index int
|
|
entry v1.History
|
|
layerDigest string
|
|
base string
|
|
tag string
|
|
}
|
|
|
|
type layerInfo struct {
|
|
digest v1.Hash
|
|
base string
|
|
tag string
|
|
}
|
|
|
|
// Chunks emits data over a channel that is decoded and scanned for secrets.
|
|
func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ ...sources.ChunkingTarget) error {
|
|
ctx = context.WithValues(ctx, "source_type", s.Type(), "source_name", s.name)
|
|
|
|
workers := new(errgroup.Group)
|
|
workers.SetLimit(s.concurrency)
|
|
|
|
scanErrs := sources.NewScanErrors()
|
|
for _, image := range s.conn.GetImages() {
|
|
image := image
|
|
workers.Go(func() error {
|
|
if common.IsDone(ctx) {
|
|
return nil
|
|
}
|
|
|
|
imgInfo, err := s.processImage(ctx, image)
|
|
if err != nil {
|
|
scanErrs.Add(err)
|
|
return nil
|
|
}
|
|
|
|
ctx = context.WithValues(ctx, "image", imgInfo.base, "tag", imgInfo.tag)
|
|
|
|
ctx.Logger().V(2).Info("scanning image history")
|
|
|
|
historyEntries, err := getHistoryEntries(ctx, imgInfo)
|
|
if err != nil {
|
|
scanErrs.Add(err)
|
|
return nil
|
|
}
|
|
|
|
for _, historyEntry := range historyEntries {
|
|
if err := s.processHistoryEntry(ctx, historyEntry, chunksChan); err != nil {
|
|
scanErrs.Add(err)
|
|
return nil
|
|
}
|
|
dockerHistoryEntriesScanned.WithLabelValues(s.name).Inc()
|
|
}
|
|
|
|
ctx.Logger().V(2).Info("scanning image layers")
|
|
|
|
layers, err := imgInfo.image.Layers()
|
|
if err != nil {
|
|
scanErrs.Add(err)
|
|
return nil
|
|
}
|
|
|
|
for _, layer := range layers {
|
|
if err := s.processLayer(ctx, layer, imgInfo, chunksChan); err != nil {
|
|
scanErrs.Add(err)
|
|
return nil
|
|
}
|
|
dockerLayersScanned.WithLabelValues(s.name).Inc()
|
|
}
|
|
|
|
dockerImagesScanned.WithLabelValues(s.name).Inc()
|
|
|
|
return nil
|
|
})
|
|
}
|
|
_ = workers.Wait()
|
|
if scanErrs.Count() > 0 {
|
|
ctx.Logger().V(2).Info("scan errors", "errors", scanErrs.String())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// processImage processes an individual image and prepares it for further processing.
|
|
func (s *Source) processImage(ctx context.Context, image string) (imageInfo, error) {
|
|
var (
|
|
imgInfo imageInfo
|
|
hasDigest bool
|
|
imageName name.Reference
|
|
)
|
|
|
|
remoteOpts, err := s.remoteOpts()
|
|
if err != nil {
|
|
return imgInfo, err
|
|
}
|
|
|
|
const filePrefix = "file://"
|
|
if strings.HasPrefix(image, filePrefix) {
|
|
image = strings.TrimPrefix(image, filePrefix)
|
|
imgInfo.base = image
|
|
imgInfo.image, err = tarball.ImageFromPath(image, nil)
|
|
if err != nil {
|
|
return imgInfo, err
|
|
}
|
|
} else {
|
|
imgInfo.base, imgInfo.tag, hasDigest = baseAndTagFromImage(image)
|
|
|
|
if hasDigest {
|
|
imageName, err = name.NewDigest(image)
|
|
} else {
|
|
imageName, err = name.NewTag(image)
|
|
}
|
|
if err != nil {
|
|
return imgInfo, err
|
|
}
|
|
|
|
imgInfo.image, err = remote.Image(imageName, remoteOpts...)
|
|
if err != nil {
|
|
return imgInfo, err
|
|
}
|
|
}
|
|
|
|
ctx.Logger().WithValues("image", imgInfo.base, "tag", imgInfo.tag).V(2).Info("scanning image")
|
|
|
|
return imgInfo, nil
|
|
}
|
|
|
|
// getHistoryEntries collates an image's configuration history together with the
|
|
// corresponding layer digests for any non-empty layers.
|
|
func getHistoryEntries(ctx context.Context, imgInfo imageInfo) ([]historyEntryInfo, error) {
|
|
config, err := imgInfo.image.ConfigFile()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
layers, err := imgInfo.image.Layers()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
history := config.History
|
|
entries := make([]historyEntryInfo, len(history))
|
|
|
|
layerIndex := 0
|
|
for historyIndex, entry := range history {
|
|
e := historyEntryInfo{
|
|
base: imgInfo.base,
|
|
tag: imgInfo.tag,
|
|
entry: entry,
|
|
index: historyIndex,
|
|
}
|
|
|
|
// Associate with a layer if possible -- failing to do this will not affect
|
|
// the scan, just remove some traceability.
|
|
if !entry.EmptyLayer {
|
|
if layerIndex < len(layers) {
|
|
digest, err := layers[layerIndex].Digest()
|
|
|
|
if err == nil {
|
|
e.layerDigest = digest.String()
|
|
} else {
|
|
ctx.Logger().V(2).Error(err, "cannot associate layer with history entry: layer digest failed",
|
|
"layerIndex", layerIndex, "historyIndex", historyIndex)
|
|
}
|
|
} else {
|
|
ctx.Logger().V(2).Info("cannot associate layer with history entry: no correlated layer exists at this index",
|
|
"layerIndex", layerIndex, "historyIndex", historyIndex)
|
|
}
|
|
|
|
layerIndex++
|
|
}
|
|
|
|
entries[historyIndex] = e
|
|
}
|
|
|
|
return entries, nil
|
|
}
|
|
|
|
// processHistoryEntry processes a history entry from the image configuration metadata.
|
|
func (s *Source) processHistoryEntry(ctx context.Context, historyInfo historyEntryInfo, chunksChan chan *sources.Chunk) error {
|
|
// Make up an identifier for this entry that is moderately sensible. There is
|
|
// no file name to use here, so the path tries to be a little descriptive.
|
|
entryPath := fmt.Sprintf("image-metadata:history:%d:created-by", historyInfo.index)
|
|
|
|
chunk := &sources.Chunk{
|
|
SourceType: s.Type(),
|
|
SourceName: s.name,
|
|
SourceID: s.SourceID(),
|
|
SourceMetadata: &source_metadatapb.MetaData{
|
|
Data: &source_metadatapb.MetaData_Docker{
|
|
Docker: &source_metadatapb.Docker{
|
|
File: entryPath,
|
|
Image: historyInfo.base,
|
|
Tag: historyInfo.tag,
|
|
Layer: historyInfo.layerDigest,
|
|
},
|
|
},
|
|
},
|
|
Verify: s.verify,
|
|
Data: []byte(historyInfo.entry.CreatedBy),
|
|
}
|
|
|
|
ctx.Logger().V(2).Info("scanning image history entry", "index", historyInfo.index, "layer", historyInfo.layerDigest)
|
|
|
|
return common.CancellableWrite(ctx, chunksChan, chunk)
|
|
}
|
|
|
|
// processLayer processes an individual layer of an image.
|
|
func (s *Source) processLayer(ctx context.Context, layer v1.Layer, imgInfo imageInfo, chunksChan chan *sources.Chunk) error {
|
|
layerInfo := layerInfo{
|
|
base: imgInfo.base,
|
|
tag: imgInfo.tag,
|
|
}
|
|
|
|
var err error
|
|
layerInfo.digest, err = layer.Digest()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ctx.Logger().WithValues("layer", layerInfo.digest.String()).V(2).Info("scanning layer")
|
|
|
|
rc, err := layer.Compressed()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rc.Close()
|
|
|
|
gzipReader, err := gzip.NewReader(rc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer gzipReader.Close()
|
|
|
|
tarReader := tar.NewReader(gzipReader)
|
|
for {
|
|
header, err := tarReader.Next()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
info := chunkProcessingInfo{size: header.Size, name: header.Name, reader: tarReader, layer: layerInfo}
|
|
if err := s.processChunk(ctx, info, chunksChan); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
type chunkProcessingInfo struct {
|
|
size int64
|
|
name string
|
|
reader io.Reader
|
|
layer layerInfo
|
|
}
|
|
|
|
// processChunk processes an individual chunk of a layer.
|
|
func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chunksChan chan *sources.Chunk) error {
|
|
const filesizeLimitBytes int64 = 50 * 1024 * 1024 // 50MB
|
|
if info.size > filesizeLimitBytes {
|
|
ctx.Logger().V(4).Info("skipping large file", "file", info.name, "size", info.size)
|
|
return nil
|
|
}
|
|
|
|
chunkReader := sources.NewChunkReader()
|
|
chunkResChan := chunkReader(ctx, info.reader)
|
|
|
|
for data := range chunkResChan {
|
|
if err := data.Error(); err != nil {
|
|
ctx.Logger().Error(err, "error reading chunk.")
|
|
continue
|
|
}
|
|
|
|
chunk := &sources.Chunk{
|
|
SourceType: s.Type(),
|
|
SourceName: s.name,
|
|
SourceID: s.SourceID(),
|
|
SourceMetadata: &source_metadatapb.MetaData{
|
|
Data: &source_metadatapb.MetaData_Docker{
|
|
Docker: &source_metadatapb.Docker{
|
|
File: "/" + info.name,
|
|
Image: info.layer.base,
|
|
Tag: info.layer.tag,
|
|
Layer: info.layer.digest.String(),
|
|
},
|
|
},
|
|
},
|
|
Verify: s.verify,
|
|
}
|
|
chunk.Data = data.Bytes()
|
|
|
|
if err := common.CancellableWrite(ctx, chunksChan, chunk); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Source) remoteOpts() ([]remote.Option, error) {
|
|
switch s.conn.GetCredential().(type) {
|
|
case *sourcespb.Docker_Unauthenticated:
|
|
return nil, nil
|
|
case *sourcespb.Docker_BasicAuth:
|
|
return []remote.Option{
|
|
remote.WithAuth(&authn.Basic{
|
|
Username: s.conn.GetBasicAuth().GetUsername(),
|
|
Password: s.conn.GetBasicAuth().GetPassword(),
|
|
}),
|
|
}, nil
|
|
case *sourcespb.Docker_BearerToken:
|
|
return []remote.Option{
|
|
remote.WithAuth(&authn.Bearer{
|
|
Token: s.conn.GetBearerToken(),
|
|
}),
|
|
}, nil
|
|
case *sourcespb.Docker_DockerKeychain:
|
|
return []remote.Option{
|
|
remote.WithAuthFromKeychain(authn.DefaultKeychain),
|
|
}, nil
|
|
default:
|
|
return nil, fmt.Errorf("unknown credential type: %T", s.conn.Credential)
|
|
}
|
|
}
|
|
|
|
func baseAndTagFromImage(image string) (base, tag string, hasDigest bool) {
|
|
if base, tag, hasDigest = extractDigest(image); hasDigest {
|
|
return base, tag, true
|
|
}
|
|
|
|
base, tag = extractTagOrUseDefault(image)
|
|
return base, tag, false
|
|
}
|
|
|
|
// extractDigest tries to split the image string on the digest delimiter.
|
|
// If successful, it means the image has a digest.
|
|
func extractDigest(image string) (base, tag string, hasDigest bool) {
|
|
const digestDelim = "@"
|
|
|
|
if parts := strings.SplitN(image, digestDelim, 2); len(parts) > 1 {
|
|
return parts[0], parts[1], true
|
|
}
|
|
return "", "", false
|
|
}
|
|
|
|
// extractTagOrUseDefault extracts the tag from the image string.
|
|
// If no tag is found, it defaults to "latest".
|
|
func extractTagOrUseDefault(image string) (base, tag string) {
|
|
const (
|
|
tagDelim = ":"
|
|
regRepoDelim = "/"
|
|
)
|
|
|
|
parts := strings.Split(image, tagDelim)
|
|
|
|
// Check if the last part is not a hostname with a port (for weak validation)
|
|
if len(parts) > 1 && !strings.Contains(parts[len(parts)-1], regRepoDelim) {
|
|
return strings.Join(parts[:len(parts)-1], tagDelim), parts[len(parts)-1]
|
|
}
|
|
return image, "latest"
|
|
}
|