add tests around MatchNamedCaptureGroups + rename

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-04-07 15:47:43 -04:00
parent 66ebe49a04
commit 5743e32e02
No known key found for this signature in database
GPG key ID: 5CB45AE22BAB7EA7
7 changed files with 113 additions and 19 deletions

View file

@ -1,15 +0,0 @@
package internal
import "regexp"
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

39
internal/regex_helpers.go Normal file
View file

@ -0,0 +1,39 @@
package internal
import "regexp"
// MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
// Note: this is only for the first match in the regex.
func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string {
// note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example:
// input: "cool something to match against" pattern: `((?P<name>match) (?P<version>against))?`. Since the pattern is
// encapsulated in an optional capture group, there will be results for each character, but the results will match
// on nothing. The only "true" match will be at the end ("match against").
allMatches := regEx.FindAllStringSubmatch(content, -1)
for matchIdx, match := range allMatches {
// fill a candidate results map with named capture group results, accepting empty values, but not groups with
// no names
results := make(map[string]string)
for nameIdx, name := range regEx.SubexpNames() {
if nameIdx <= len(match) && len(name) > 0 {
results[name] = match[nameIdx]
}
}
// note: since we are looking for the first best potential match we should stop when we find the first one
// with non-empty results.
if len(results) > 0 {
foundNonEmptyValue := false
for _, value := range results {
if value != "" {
foundNonEmptyValue = true
break
}
}
// return the first non-empty result, or if this is the last match, the results that were found.
if foundNonEmptyValue || matchIdx == len(allMatches)-1 {
return results
}
}
}
return nil
}

View file

@ -0,0 +1,70 @@
package internal
import (
"regexp"
"testing"
"github.com/stretchr/testify/assert"
)
func TestMatchCaptureGroups(t *testing.T) {
tests := []struct {
name string
input string
pattern string
expected map[string]string
}{
{
name: "go-case",
input: "match this thing",
pattern: `(?P<name>match).*(?P<version>thing)`,
expected: map[string]string{
"name": "match",
"version": "thing",
},
},
{
name: "only matches the first instance",
input: "match this thing batch another think",
pattern: `(?P<name>[mb]atch).*?(?P<version>thin[gk])`,
expected: map[string]string{
"name": "match",
"version": "thing",
},
},
{
name: "nested capture groups",
input: "cool something to match against",
pattern: `((?P<name>match) (?P<version>against))`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
{
name: "nested optional capture groups",
input: "cool something to match against",
pattern: `((?P<name>match) (?P<version>against))?`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
{
name: "nested optional capture groups with larger match",
input: "cool something to match against match never",
pattern: `.*?((?P<name>match) (?P<version>(against|never)))?`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := MatchNamedCaptureGroups(regexp.MustCompile(test.pattern), test.input)
assert.Equal(t, test.expected, actual)
})
}
}

View file

@ -21,7 +21,7 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
for scanner.Scan() {
line := scanner.Text()
matchesByGroup := internal.MatchCaptureGroups(licensePattern, line)
matchesByGroup := internal.MatchNamedCaptureGroups(licensePattern, line)
if len(matchesByGroup) > 0 {
candidate, ok := matchesByGroup["license"]
if !ok {

View file

@ -145,7 +145,7 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
// of the "<name>" form, then return name and nil
func extractSourceVersion(source string) (string, string) {
// special handling for the Source field since it has formatted data
match := internal.MatchCaptureGroups(sourceRegexp, source)
match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
return match["name"], match["version"]
}

View file

@ -63,7 +63,7 @@ func (a *Author) UnmarshalJSON(b []byte) error {
}
} else {
// parse out "name <email> (url)" into an Author struct
fields = internal.MatchCaptureGroups(authorPattern, authorStr)
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
}
// translate the map into a structure

View file

@ -77,7 +77,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
}
for field, pattern := range patterns {
matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil {
fields[field] = postProcessor(value)