mirror of
https://github.com/trufflesecurity/trufflehog.git
synced 2024-11-10 07:04:24 +00:00
Add disk buffer tempfile cleanup (#2130)
* add tempfile creation - break PID retrieval into sep. function * add tmpfile cleanup func * add file cleanup to main cleanup func * refactor file logic to only return name string * add temp buffer naming to gcs * add temp buffer naming to s3 * add temp buffer naming to filesystem * add temp buffer naming to git * consolidate cleanup functions - have single function handle both files and dirs - remove interface(not needed with a single func implementation) - change calls to `New(...)` to reflect config implementation - simplify automation in main.go - update disk-buffer-reader dependency * integrate changes from pr #2133 * merge main * checkout from main to revert conflict issues * re-add buffer logic to git * interface no longer needed * move string format to global const --------- Co-authored-by: Ahrav Dutta <ahrav.dutta@trufflesec.com>
This commit is contained in:
parent
405f356071
commit
53f060a08e
6 changed files with 57 additions and 33 deletions
1
main.go
1
main.go
|
@ -220,7 +220,6 @@ func main() {
|
|||
ctx := context.Background()
|
||||
|
||||
go cleantemp.RunCleanupLoop(ctx)
|
||||
|
||||
}
|
||||
|
||||
func run(state overseer.State) {
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package cleantemp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -15,11 +14,16 @@ import (
|
|||
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultExecPath = "trufflehog"
|
||||
defaultArtifactPrefixFormat = "%s-%d-"
|
||||
)
|
||||
|
||||
// MkdirTemp returns a temporary directory path formatted as:
|
||||
// trufflehog-<pid>-<randint>
|
||||
func MkdirTemp() (string, error) {
|
||||
pid := os.Getpid()
|
||||
tmpdir := fmt.Sprintf("%s-%d-", "trufflehog", pid)
|
||||
tmpdir := fmt.Sprintf(defaultArtifactPrefixFormat, defaultExecPath, pid)
|
||||
dir, err := os.MkdirTemp(os.TempDir(), tmpdir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -27,27 +31,26 @@ func MkdirTemp() (string, error) {
|
|||
return dir, nil
|
||||
}
|
||||
|
||||
// CleanTemp is used to remove orphaned artifacts from aborted scans.
|
||||
type CleanTemp interface {
|
||||
// CleanTempDir removes orphaned directories from sources. ex: Git
|
||||
CleanTempDir(ctx logContext.Context, dirName string, pid int) error
|
||||
// CleanTempFiles removes orphaned files/artifacts from sources. ex: Artifactory
|
||||
CleanTempFiles(ctx context.Context, fileName string, pid int) error
|
||||
// Unlike MkdirTemp, we only want to generate the filename string.
|
||||
// The tempfile creation in trufflehog we're interested in
|
||||
// is generally handled by "github.com/trufflesecurity/disk-buffer-reader"
|
||||
func MkFilename() string {
|
||||
pid := os.Getpid()
|
||||
filename := fmt.Sprintf(defaultArtifactPrefixFormat, defaultExecPath, pid)
|
||||
return filename
|
||||
}
|
||||
|
||||
// Only compile during startup.
|
||||
var trufflehogRE = regexp.MustCompile(`^trufflehog-\d+-\d+$`)
|
||||
|
||||
// CleanTempDir removes orphaned temp directories that do not contain running PID values.
|
||||
func CleanTempDir(ctx logContext.Context) error {
|
||||
const defaultExecPath = "trufflehog"
|
||||
// CleanTempArtifacts deletes orphaned temp directories and files that do not contain running PID values.
|
||||
func CleanTempArtifacts(ctx logContext.Context) error {
|
||||
executablePath, err := os.Executable()
|
||||
if err != nil {
|
||||
executablePath = defaultExecPath
|
||||
}
|
||||
execName := filepath.Base(executablePath)
|
||||
|
||||
// Finds other trufflehog PIDs that may be running
|
||||
var pids []string
|
||||
procs, err := ps.Processes()
|
||||
if err != nil {
|
||||
|
@ -61,40 +64,49 @@ func CleanTempDir(ctx logContext.Context) error {
|
|||
}
|
||||
|
||||
tempDir := os.TempDir()
|
||||
dirs, err := os.ReadDir(tempDir)
|
||||
artifacts, err := os.ReadDir(tempDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading temp dir: %w", err)
|
||||
}
|
||||
|
||||
for _, dir := range dirs {
|
||||
// Ensure that all directories match the pattern.
|
||||
if trufflehogRE.MatchString(dir.Name()) {
|
||||
// Mark these directories initially as ones that should be deleted.
|
||||
for _, artifact := range artifacts {
|
||||
if trufflehogRE.MatchString(artifact.Name()) {
|
||||
// Mark these artifacts initially as ones that should be deleted.
|
||||
shouldDelete := true
|
||||
// If they match any live PIDs, mark as should not delete.
|
||||
// Check if the name matches any live PIDs.
|
||||
for _, pidval := range pids {
|
||||
if strings.Contains(dir.Name(), fmt.Sprintf("-%s-", pidval)) {
|
||||
if strings.Contains(artifact.Name(), fmt.Sprintf("-%s-", pidval)) {
|
||||
shouldDelete = false
|
||||
// break out so we can still delete directories even if no other Trufflehog processes are running.
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if shouldDelete {
|
||||
dirPath := filepath.Join(tempDir, dir.Name())
|
||||
if err := os.RemoveAll(dirPath); err != nil {
|
||||
return fmt.Errorf("error deleting temp directory: %s", dirPath)
|
||||
artifactPath := filepath.Join(tempDir, artifact.Name())
|
||||
|
||||
var err error
|
||||
if artifact.IsDir() {
|
||||
err = os.RemoveAll(artifactPath)
|
||||
} else {
|
||||
err = os.Remove(artifactPath)
|
||||
}
|
||||
ctx.Logger().V(1).Info("Deleted directory", "directory", dirPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error deleting temp artifact: %s", artifactPath)
|
||||
}
|
||||
|
||||
ctx.Logger().Info("Deleted orphaned temp artifact", "artifact", artifactPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunCleanupLoop runs a loop that cleans up orphaned directories every 15 seconds
|
||||
// RunCleanupLoop runs a loop that cleans up orphaned directories every 15 seconds.
|
||||
func RunCleanupLoop(ctx logContext.Context) {
|
||||
if err := CleanTempDir(ctx); err != nil {
|
||||
ctx.Logger().Error(err, "error cleaning up orphaned directories ")
|
||||
err := CleanTempArtifacts(ctx)
|
||||
if err != nil {
|
||||
ctx.Logger().Error(err, "Error cleaning up orphaned directories ")
|
||||
}
|
||||
|
||||
const cleanupLoopInterval = 15 * time.Second
|
||||
|
@ -104,7 +116,7 @@ func RunCleanupLoop(ctx logContext.Context) {
|
|||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if err := CleanTempDir(ctx); err != nil {
|
||||
if err := CleanTempArtifacts(ctx); err != nil {
|
||||
ctx.Logger().Error(err, "error cleaning up orphaned directories")
|
||||
}
|
||||
case <-ctx.Done():
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
|
@ -149,10 +150,13 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
|
|||
if err != nil {
|
||||
return fmt.Errorf("unable to open file: %w", err)
|
||||
}
|
||||
|
||||
bufferName := cleantemp.MkFilename()
|
||||
|
||||
defer inputFile.Close()
|
||||
logger.V(3).Info("scanning file")
|
||||
|
||||
reReader, err := diskbufferreader.New(inputFile)
|
||||
reReader, err := diskbufferreader.New(inputFile, diskbufferreader.WithBufferName(bufferName))
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create re-readable reader: %w", err)
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import (
|
|||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
|
||||
|
@ -369,7 +370,8 @@ func (s *Source) processObject(ctx context.Context, o object) error {
|
|||
}
|
||||
|
||||
func (s *Source) readObjectData(ctx context.Context, o object, chunk *sources.Chunk) ([]byte, error) {
|
||||
reader, err := diskbufferreader.New(o)
|
||||
bufferName := cleantemp.MkFilename()
|
||||
reader, err := diskbufferreader.New(o, diskbufferreader.WithBufferName(bufferName))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating disk buffer reader: %w", err)
|
||||
}
|
||||
|
|
|
@ -1047,10 +1047,14 @@ func handleBinary(ctx context.Context, gitDir string, reporter sources.ChunkRepo
|
|||
fileCtx.Logger().V(2).Info("Max archive size reached.")
|
||||
}
|
||||
|
||||
reader, err := diskbufferreader.New(&fileContent)
|
||||
bufferName := cleantemp.MkFilename()
|
||||
|
||||
reader, err := diskbufferreader.New(&fileContent, diskbufferreader.WithBufferName(bufferName))
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer reader.Close()
|
||||
|
||||
if handlers.HandleFile(fileCtx, reader, chunkSkel, reporter) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"google.golang.org/protobuf/proto"
|
||||
"google.golang.org/protobuf/types/known/anypb"
|
||||
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/cleantemp"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
|
||||
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
|
||||
|
@ -345,8 +346,10 @@ func (s *Source) pageChunker(ctx context.Context, client *s3.S3, chunksChan chan
|
|||
return nil
|
||||
}
|
||||
|
||||
bufferName := cleantemp.MkFilename()
|
||||
|
||||
defer res.Body.Close()
|
||||
reader, err := diskbufferreader.New(res.Body)
|
||||
reader, err := diskbufferreader.New(res.Body, diskbufferreader.WithBufferName(bufferName))
|
||||
if err != nil {
|
||||
s.log.Error(err, "Could not create reader.")
|
||||
return nil
|
||||
|
|
Loading…
Reference in a new issue