trufflehog/pkg/sources/resume.go
Miccah d317ddb51a
[chore] Remove logrus from circleci, filesystem, gitlab, and s3 sources (#1089)
* [chore] Remove logrus from circleci, filesystem, gitlab, and s3 sources

* Address comments
2023-02-10 11:02:55 -06:00

81 lines
2.8 KiB
Go

package sources
import (
"strings"
)
// RemoveRepoFromResumeInfo removes the repoURL from the resume info.
func RemoveRepoFromResumeInfo(resumeRepos []string, repoURL string) []string {
index := -1
for i, repo := range resumeRepos {
if repoURL == repo {
index = i
break
}
}
if index == -1 {
// We should never be able to be here. But if we are, it means the resume info never had the repo added.
// So do nothing.
return resumeRepos
}
// This removes the element at the given index.
return append(resumeRepos[:index], resumeRepos[index+1:]...)
}
// FilterReposToResume filters the existing repos down to those that are included in the encoded resume info.
// It returns the new slice of repos to be scanned.
// It also returns the difference between the original length of the repos and the new length to use for progress reporting.
// It is required that both the resumeInfo repos and the existing repos are sorted.
func FilterReposToResume(repos []string, resumeInfo string) (reposToScan []string, progressOffsetCount int) {
if resumeInfo == "" {
return repos, 0
}
resumeInfoSlice := DecodeResumeInfo(resumeInfo)
// Because this scanner is multithreaded, it is possible that we have scanned a range of repositories
// with some gaps of unlisted but completed repositories in between the ones in resumeInfo.
// So we know repositories that have not finished scanning are the ones included in the resumeInfo,
// and those that come after the last repository in the resumeInfo.
// However, it is possible that a resumed scan does not include all or even any of the repos within the resumeInfo.
// In this case, we must ensure we still scan all repos that come after the last found repo in the list.
lastFoundRepoIndex := -1
resumeRepoIndex := 0
for i, repoURL := range repos {
// If the repoURL is bigger than what we're looking for, move to the next one.
if repoURL > resumeInfoSlice[resumeRepoIndex] {
resumeRepoIndex++
}
// If we've found all of our repositories end the filter.
if resumeRepoIndex == len(resumeInfoSlice) {
break
}
// If the repoURL is the one we're looking for, add it and update the lastFoundRepoIndex.
if repoURL == resumeInfoSlice[resumeRepoIndex] {
lastFoundRepoIndex = i
reposToScan = append(reposToScan, repoURL)
}
}
// Append all repos after the last one we've found.
reposToScan = append(reposToScan, repos[lastFoundRepoIndex+1:]...)
progressOffsetCount = len(repos) - len(reposToScan)
return
}
func EncodeResumeInfo(resumeInfoSlice []string) string {
return strings.Join(resumeInfoSlice, "\t")
}
func DecodeResumeInfo(resumeInfo string) []string {
// strings.Split will, for an empty string, return []string{""},
// which is an element, where as when there is no resume info we want an empty slice.
if resumeInfo == "" {
return nil
}
return strings.Split(resumeInfo, "\t")
}