Add disk buffer tempfile cleanup (#2130)

* add tempfile creation

- break PID retrieval into sep. function

* add tmpfile cleanup func

* add file cleanup to main cleanup func

* refactor file logic to only return name string

* add temp buffer naming to gcs

* add temp buffer naming to s3

* add temp buffer naming to filesystem

* add temp buffer naming to git

* consolidate cleanup functions

- have single function handle both files and dirs
- remove interface(not needed with a single func implementation)
- change calls to `New(...)` to reflect config implementation
- simplify automation in main.go
- update disk-buffer-reader dependency

* integrate changes from pr #2133

* merge main

* checkout from main to revert conflict issues

* re-add buffer logic to git

* interface no longer needed

* move string format to global const

---------

Co-authored-by: Ahrav Dutta <ahrav.dutta@trufflesec.com>
This commit is contained in:
Mike Vanbuskirk 2023-12-11 17:31:50 -06:00 committed by GitHub
parent 405f356071
commit 53f060a08e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 57 additions and 33 deletions

View file

@ -220,7 +220,6 @@ func main() {
ctx := context.Background()
go cleantemp.RunCleanupLoop(ctx)
}
func run(state overseer.State) {

View file

@ -1,7 +1,6 @@
package cleantemp
import (
"context"
"fmt"
"os"
"path/filepath"
@ -15,11 +14,16 @@ import (
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
)
const (
defaultExecPath = "trufflehog"
defaultArtifactPrefixFormat = "%s-%d-"
)
// MkdirTemp returns a temporary directory path formatted as:
// trufflehog-<pid>-<randint>
func MkdirTemp() (string, error) {
pid := os.Getpid()
tmpdir := fmt.Sprintf("%s-%d-", "trufflehog", pid)
tmpdir := fmt.Sprintf(defaultArtifactPrefixFormat, defaultExecPath, pid)
dir, err := os.MkdirTemp(os.TempDir(), tmpdir)
if err != nil {
return "", err
@ -27,27 +31,26 @@ func MkdirTemp() (string, error) {
return dir, nil
}
// CleanTemp is used to remove orphaned artifacts from aborted scans.
type CleanTemp interface {
// CleanTempDir removes orphaned directories from sources. ex: Git
CleanTempDir(ctx logContext.Context, dirName string, pid int) error
// CleanTempFiles removes orphaned files/artifacts from sources. ex: Artifactory
CleanTempFiles(ctx context.Context, fileName string, pid int) error
// Unlike MkdirTemp, we only want to generate the filename string.
// The tempfile creation in trufflehog we're interested in
// is generally handled by "github.com/trufflesecurity/disk-buffer-reader"
func MkFilename() string {
pid := os.Getpid()
filename := fmt.Sprintf(defaultArtifactPrefixFormat, defaultExecPath, pid)
return filename
}
// Only compile during startup.
var trufflehogRE = regexp.MustCompile(`^trufflehog-\d+-\d+$`)
// CleanTempDir removes orphaned temp directories that do not contain running PID values.
func CleanTempDir(ctx logContext.Context) error {
const defaultExecPath = "trufflehog"
// CleanTempArtifacts deletes orphaned temp directories and files that do not contain running PID values.
func CleanTempArtifacts(ctx logContext.Context) error {
executablePath, err := os.Executable()
if err != nil {
executablePath = defaultExecPath
}
execName := filepath.Base(executablePath)
// Finds other trufflehog PIDs that may be running
var pids []string
procs, err := ps.Processes()
if err != nil {
@ -61,40 +64,49 @@ func CleanTempDir(ctx logContext.Context) error {
}
tempDir := os.TempDir()
dirs, err := os.ReadDir(tempDir)
artifacts, err := os.ReadDir(tempDir)
if err != nil {
return fmt.Errorf("error reading temp dir: %w", err)
}
for _, dir := range dirs {
// Ensure that all directories match the pattern.
if trufflehogRE.MatchString(dir.Name()) {
// Mark these directories initially as ones that should be deleted.
for _, artifact := range artifacts {
if trufflehogRE.MatchString(artifact.Name()) {
// Mark these artifacts initially as ones that should be deleted.
shouldDelete := true
// If they match any live PIDs, mark as should not delete.
// Check if the name matches any live PIDs.
for _, pidval := range pids {
if strings.Contains(dir.Name(), fmt.Sprintf("-%s-", pidval)) {
if strings.Contains(artifact.Name(), fmt.Sprintf("-%s-", pidval)) {
shouldDelete = false
// break out so we can still delete directories even if no other Trufflehog processes are running.
break
}
}
if shouldDelete {
dirPath := filepath.Join(tempDir, dir.Name())
if err := os.RemoveAll(dirPath); err != nil {
return fmt.Errorf("error deleting temp directory: %s", dirPath)
artifactPath := filepath.Join(tempDir, artifact.Name())
var err error
if artifact.IsDir() {
err = os.RemoveAll(artifactPath)
} else {
err = os.Remove(artifactPath)
}
ctx.Logger().V(1).Info("Deleted directory", "directory", dirPath)
if err != nil {
return fmt.Errorf("Error deleting temp artifact: %s", artifactPath)
}
ctx.Logger().Info("Deleted orphaned temp artifact", "artifact", artifactPath)
}
}
}
return nil
}
// RunCleanupLoop runs a loop that cleans up orphaned directories every 15 seconds
// RunCleanupLoop runs a loop that cleans up orphaned directories every 15 seconds.
func RunCleanupLoop(ctx logContext.Context) {
if err := CleanTempDir(ctx); err != nil {
ctx.Logger().Error(err, "error cleaning up orphaned directories ")
err := CleanTempArtifacts(ctx)
if err != nil {
ctx.Logger().Error(err, "Error cleaning up orphaned directories ")
}
const cleanupLoopInterval = 15 * time.Second
@ -104,7 +116,7 @@ func RunCleanupLoop(ctx logContext.Context) {
for {
select {
case <-ticker.C:
if err := CleanTempDir(ctx); err != nil {
if err := CleanTempArtifacts(ctx); err != nil {
ctx.Logger().Error(err, "error cleaning up orphaned directories")
}
case <-ctx.Done():

View file

@ -13,6 +13,7 @@ import (
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
@ -149,10 +150,13 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
if err != nil {
return fmt.Errorf("unable to open file: %w", err)
}
bufferName := cleantemp.MkFilename()
defer inputFile.Close()
logger.V(3).Info("scanning file")
reReader, err := diskbufferreader.New(inputFile)
reReader, err := diskbufferreader.New(inputFile, diskbufferreader.WithBufferName(bufferName))
if err != nil {
return fmt.Errorf("could not create re-readable reader: %w", err)
}

View file

@ -20,6 +20,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory"
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
@ -369,7 +370,8 @@ func (s *Source) processObject(ctx context.Context, o object) error {
}
func (s *Source) readObjectData(ctx context.Context, o object, chunk *sources.Chunk) ([]byte, error) {
reader, err := diskbufferreader.New(o)
bufferName := cleantemp.MkFilename()
reader, err := diskbufferreader.New(o, diskbufferreader.WithBufferName(bufferName))
if err != nil {
return nil, fmt.Errorf("error creating disk buffer reader: %w", err)
}

View file

@ -1047,10 +1047,14 @@ func handleBinary(ctx context.Context, gitDir string, reporter sources.ChunkRepo
fileCtx.Logger().V(2).Info("Max archive size reached.")
}
reader, err := diskbufferreader.New(&fileContent)
bufferName := cleantemp.MkFilename()
reader, err := diskbufferreader.New(&fileContent, diskbufferreader.WithBufferName(bufferName))
if err != nil {
return err
}
defer reader.Close()
if handlers.HandleFile(fileCtx, reader, chunkSkel, reporter) {

View file

@ -21,6 +21,7 @@ import (
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
@ -345,8 +346,10 @@ func (s *Source) pageChunker(ctx context.Context, client *s3.S3, chunksChan chan
return nil
}
bufferName := cleantemp.MkFilename()
defer res.Body.Close()
reader, err := diskbufferreader.New(res.Body)
reader, err := diskbufferreader.New(res.Body, diskbufferreader.WithBufferName(bufferName))
if err != nil {
s.log.Error(err, "Could not create reader.")
return nil