syft/internal/file/tar_file_traversal.go
Alex Goodman 38c4b17847
Add support for searching for jars within archives (#734)
* add support for searching jars within archives

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add package cataloger config options

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* address review comments + factor out safeCopy helper

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update config docs regarding package archive search options

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* show that unindexed archive cataloging defaults to false

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* remove lies about -s

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* address review comments

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update search archive note about java

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
2022-01-06 21:40:51 +00:00

64 lines
1.9 KiB
Go

package file
import (
"fmt"
"io/ioutil"
"path/filepath"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archiver/v3"
)
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths
if len(globs) == 0 {
return results, nil
}
visitor := func(file archiver.File) error {
defer file.Close()
// ignore directories
if file.FileInfo.IsDir() {
return nil
}
// ignore any filename that doesn't match the given globs...
if !matchesAnyGlob(file.Name(), globs...) {
return nil
}
// we have a file we want to extract....
tempfilePrefix := filepath.Base(filepath.Clean(file.Name())) + "-"
tempFile, err := ioutil.TempFile(dir, tempfilePrefix)
if err != nil {
return fmt.Errorf("unable to create temp file: %w", err)
}
// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
}
results[file.Name()] = Opener{path: tempFile.Name()}
return nil
}
return results, archiver.Walk(archivePath, visitor)
}
func matchesAnyGlob(name string, globs ...string) bool {
for _, glob := range globs {
if matches, err := doublestar.PathMatch(glob, name); err == nil && matches {
return true
}
}
return false
}