trufflehog/pkg/output/legacy_json.go
Brendan Shaklovitz fad34d4dc6
git worktree scanning fix for #827 (#1315)
* Fix worktree scan by setting EnableDotGitCommonDir

* Change `PlainOpenOptions` to set `EnableDotGitCommonDir` to true.
  In every current usage of this function, it is on an already-cloned
  repository, so it should always be valid to have this set. By doing
  so, it should fix some issues with worktrees.

* Remove unused go.mod replace directives

* Remove replace directives for libraries that are not in use.
2023-05-09 08:00:47 -07:00

236 lines
7.3 KiB
Go

package output
import (
"encoding/json"
"fmt"
"log"
"net/url"
"os"
"strings"
gogit "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
)
func PrintLegacyJSON(ctx context.Context, r *detectors.ResultWithMetadata) error {
var repo string
switch r.SourceType {
case sourcespb.SourceType_SOURCE_TYPE_GIT:
repo = r.SourceMetadata.GetGit().Repository
case sourcespb.SourceType_SOURCE_TYPE_GITHUB:
repo = r.SourceMetadata.GetGithub().Repository
case sourcespb.SourceType_SOURCE_TYPE_GITLAB:
repo = r.SourceMetadata.GetGitlab().Repository
default:
return fmt.Errorf("unsupported source type for legacy json output: %s", r.SourceType)
}
// cloning the repo again here is not great and only works with unauthed repos
repoPath, remote, err := git.PrepareRepo(ctx, repo)
if err != nil || repoPath == "" {
return fmt.Errorf("error preparing git repo for scanning: %w", err)
}
if remote {
defer os.RemoveAll(repoPath)
}
legacy, err := ConvertToLegacyJSON(r, repoPath)
if err != nil {
return fmt.Errorf("could not convert to legacy JSON: %w", err)
}
out, err := json.Marshal(legacy)
if err != nil {
return fmt.Errorf("could not marshal result: %w", err)
}
fmt.Println(string(out))
return nil
}
func ConvertToLegacyJSON(r *detectors.ResultWithMetadata, repoPath string) (*LegacyJSONOutput, error) {
var source LegacyJSONCompatibleSource
switch r.SourceType {
case sourcespb.SourceType_SOURCE_TYPE_GIT:
source = r.SourceMetadata.GetGit()
case sourcespb.SourceType_SOURCE_TYPE_GITHUB:
source = r.SourceMetadata.GetGithub()
case sourcespb.SourceType_SOURCE_TYPE_GITLAB:
source = r.SourceMetadata.GetGitlab()
default:
return nil, fmt.Errorf("legacy JSON output can not be used with this source: %s", r.SourceName)
}
options := &gogit.PlainOpenOptions{
DetectDotGit: true,
EnableDotGitCommonDir: true,
}
// The repo will be needed to gather info needed for the legacy output that
// isn't included in the new output format.
repo, err := gogit.PlainOpenWithOptions(repoPath, options)
if err != nil {
return nil, fmt.Errorf("could not open repo %q: %w", repoPath, err)
}
fileName := source.GetFile()
commitHash := plumbing.NewHash(source.GetCommit())
commit, err := repo.CommitObject(commitHash)
if err != nil {
log.Fatal(err)
}
diff := GenerateDiff(commit, fileName)
foundString := string(r.Result.Raw)
// Add highlighting to the offending bit of string.
printableDiff := strings.ReplaceAll(diff, foundString, fmt.Sprintf("\u001b[93m%s\u001b[0m", foundString))
// Load up the struct to match the old JSON format
output := &LegacyJSONOutput{
Branch: FindBranch(commit, repo),
Commit: commit.Message,
CommitHash: commitHash.String(),
Date: commit.Committer.When.Format("2006-01-02 15:04:05"),
Diff: diff,
Path: fileName,
PrintDiff: printableDiff,
Reason: r.Result.DetectorType.String(),
StringsFound: []string{foundString},
}
return output, nil
}
// BranchHeads creates a map of branch names to their head commit. This can be used to find if a commit is an ancestor
// of a branch head.
func BranchHeads(repo *gogit.Repository) (map[string]*object.Commit, error) {
branches := map[string]*object.Commit{}
branchIter, err := repo.Branches()
if err != nil {
return branches, err
}
logger := context.Background().Logger()
err = branchIter.ForEach(func(branchRef *plumbing.Reference) error {
branchName := branchRef.Name().String()
headHash, err := repo.ResolveRevision(plumbing.Revision(branchName))
if err != nil {
logger.Error(err, "unable to resolve head of branch", "branch", branchRef.Name().String())
return nil
}
headCommit, err := repo.CommitObject(*headHash)
if err != nil {
logger.Error(err, "unable to get commit", "commit", headCommit.String())
return nil
}
branches[branchName] = headCommit
return nil
})
return branches, err
}
// FindBranch returns the first branch a commit is a part of. Not the most accurate, but it should work similar to pre v3.0.
func FindBranch(commit *object.Commit, repo *gogit.Repository) string {
logger := context.Background().Logger()
branches, err := BranchHeads(repo)
if err != nil {
logger.Error(err, "could not list branches")
os.Exit(1)
}
for name, head := range branches {
isAncestor, err := commit.IsAncestor(head)
if err != nil {
logger.Error(err, fmt.Sprintf("could not determine if %s is an ancestor of %s", commit.Hash.String(), head.Hash.String()))
continue
}
if isAncestor {
return name
}
}
return ""
}
// GenerateDiff will take a commit and create a string diff between the commit and its first parent.
func GenerateDiff(commit *object.Commit, fileName string) string {
var diff string
logger := context.Background().Logger().WithValues("file", fileName)
// First grab the first parent of the commit. If there are none, we are at the first commit and should diff against
// an empty file.
parent, err := commit.Parent(0)
if err != object.ErrParentNotFound && err != nil {
logger.Error(err, "could not find parent", "commit", commit.Hash.String())
}
// Now get the files from the commit and its parent.
var parentFile *object.File
if parent != nil {
parentFile, err = parent.File(fileName)
if err != nil && err != object.ErrFileNotFound {
logger.Error(err, "could not get previous version of file")
return diff
}
}
commitFile, err := commit.File(fileName)
if err != nil {
logger.Error(err, "could not get current version of file")
return diff
}
// go-git doesn't support creating a diff for just one file in a commit, so another package is needed to generate
// the diff.
dmp := diffmatchpatch.New()
var oldContent, newContent string
if parentFile != nil {
oldContent, err = parentFile.Contents()
if err != nil {
logger.Error(err, "could not get contents of previous version of file")
}
}
// commitFile should never be nil at this point, but double-checking so we don't get a nil error.
if commitFile != nil {
newContent, _ = commitFile.Contents()
if err != nil {
logger.Error(err, "could not get contents of current version of file")
}
}
// If anything has gone wrong here, we'll just be diffing two empty files.
diffs := dmp.DiffMain(oldContent, newContent, false)
patches := dmp.PatchMake(diffs)
// Put all the pieces of the diff together into one string.
for _, patch := range patches {
// The String() method URL escapes the diff, so it needs to be undone.
patchDiff, err := url.QueryUnescape(patch.String())
if err != nil {
logger.Error(err, "unable to unescape diff")
}
diff += patchDiff
}
return diff
}
type LegacyJSONOutput struct {
Branch string `json:"branch"`
Commit string `json:"commit"`
CommitHash string `json:"commitHash"`
Date string `json:"date"`
Diff string `json:"diff"`
Path string `json:"path"`
PrintDiff string `json:"printDiff"`
Reason string `json:"reason"`
StringsFound []string `json:"stringsFound"`
}
type LegacyJSONCompatibleSource interface {
GetCommit() string
GetFile() string
}