Add Analyzers interface for HuggingFace (#3140)

* implemented analyzer interface with data models for HuggingFace

* correct test for huggingface due to new addition of key in detection result.

---------

Co-authored-by: Abdul Basit <abasit@folio3.com>
This commit is contained in:
Abdul Basit 2024-08-02 20:20:11 +05:00 committed by GitHub
parent f927076483
commit 04a13385a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 229 additions and 3 deletions

View file

@ -30,11 +30,234 @@ type Analyzer struct {
func (Analyzer) Type() analyzerpb.AnalyzerType { return analyzerpb.AnalyzerType_HuggingFace }
func (a Analyzer) Analyze(_ context.Context, credInfo map[string]string) (*analyzers.AnalyzerResult, error) {
_, err := AnalyzePermissions(a.Cfg, credInfo["key"])
key, ok := credInfo["key"]
if !ok || key == "" {
return nil, fmt.Errorf("key not found in credentialInfo")
}
info, err := AnalyzePermissions(a.Cfg, key)
if err != nil {
return nil, err
}
return nil, fmt.Errorf("not implemented")
return secretInfoToAnalyzerResult(info), nil
}
func bakeUnboundedResources(tokenJSON HFTokenJSON) []analyzers.Resource {
unboundedResources := make([]analyzers.Resource, len(tokenJSON.Orgs))
for idx, org := range tokenJSON.Orgs {
unboundedResources[idx] = analyzers.Resource{
Name: org.Name,
Type: "organization",
Metadata: map[string]interface{}{
"role": org.Role,
"is_enterprise": org.IsEnterprise,
},
}
}
return unboundedResources
}
func bakeUnfineGrainedBindings(allModels []Model, tokenJSON HFTokenJSON) []analyzers.Binding {
bindings := make([]analyzers.Binding, len(allModels))
for idx, model := range allModels {
// Add Read Privs to All Models
modelResource := analyzers.Resource{
Name: model.Name,
Type: "model",
Metadata: map[string]interface{}{
"private": model.Private,
},
}
// means both read & write permission for the model
accessLevel := string(analyzers.READ)
if tokenJSON.Auth.AccessToken.Type == WRITE {
accessLevel = string(analyzers.WRITE)
}
bindings[idx] = analyzers.Binding{
Resource: modelResource,
Permission: analyzers.Permission{
Value: string(accessLevel),
},
}
}
return bindings
}
// finegrained scopes are grouped by org, user or model.
func bakefineGrainedBindings(allModels []Model, tokenJSON HFTokenJSON) []analyzers.Binding {
// this section will extract the relevant permissions for each entity and store them in a map
var nameToPermissions = make(map[string]analyzers.Permission)
for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped {
privs := analyzers.Permission{
Value: string(analyzers.NONE),
}
for _, perm := range permission.Permissions {
if perm == "repo.content.read" {
privs.Value = string(analyzers.READ)
} else if perm == "repo.write" {
privs.Value = string(analyzers.WRITE)
}
}
if permission.Entity.Type == "user" || permission.Entity.Type == "org" {
nameToPermissions[permission.Entity.Name] = privs
} else if permission.Entity.Type == "model" {
nameToPermissions[modelNameLookup(allModels, permission.Entity.ID)] = privs
}
}
bindings := make([]analyzers.Binding, len(allModels))
for idx, model := range allModels {
// Add Read Privs to All Models
modelResource := analyzers.Resource{
Name: model.Name,
Type: "model",
Metadata: map[string]interface{}{
"private": model.Private,
},
}
var perm analyzers.Permission
// get username/orgname for each model and apply those permissions
modelUsername := strings.Split(model.Name, "/")[0]
if permissions, ok := nameToPermissions[modelUsername]; ok {
perm = permissions
}
// override model permissions with repo-specific permissions
if permissions, ok := nameToPermissions[model.Name]; ok {
perm = permissions
}
bindings[idx] = analyzers.Binding{
Resource: modelResource,
Permission: perm,
}
}
return bindings
}
func bakeOrganizationBindings(tokenJSON HFTokenJSON) []analyzers.Binding {
// check if there are any org permissions
// if so, save them as a map. Only need to do this once
// even if multiple orgs b/c as of 6/6/24, users can only define one set of scopes
// for all orgs referenced on an access token
orgPermissions := map[string]struct{}{}
var orgResource *analyzers.Resource = nil
for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped {
if permission.Entity.Type == "org" {
orgResource = &analyzers.Resource{
Name: permission.Entity.Name,
Type: "organization",
}
for _, perm := range permission.Permissions {
orgPermissions[perm] = struct{}{}
}
break
}
}
bindings := make([]analyzers.Binding, 0)
// check if there are any org permissions
if orgResource == nil {
return bindings
}
for _, permission := range org_scopes_order {
for key, value := range org_scopes[permission] {
if _, ok := orgPermissions[key]; ok {
bindings = append(bindings, analyzers.Binding{
Resource: *orgResource,
Permission: analyzers.Permission{
Value: value,
},
})
}
}
}
return bindings
}
func bakeUserBindings(tokenJSON HFTokenJSON) []analyzers.Binding {
bindings := make([]analyzers.Binding, 0)
// build a map of all user permissions
users := map[string]struct{}{}
userPermissions := map[string]struct{}{}
for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped {
if permission.Entity.Type == "user" {
users[permission.Entity.Name] = struct{}{}
for _, perm := range permission.Permissions {
userPermissions[perm] = struct{}{}
}
}
}
// global permissions only apply to user tokens as of 6/6/24
// but there would be a naming collision in the scopes document
// so we prepend "global." to the key and then add to the map
for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Global {
userPermissions["global."+permission] = struct{}{}
}
// check if there are any user permissions
if len(userPermissions) == 0 {
return bindings
}
userResource := analyzers.Resource{
Name: tokenJSON.Username,
Type: "user",
}
for _, permission := range user_scopes_order {
for key, value := range user_scopes[permission] {
if _, ok := userPermissions[key]; ok {
bindings = append(bindings, analyzers.Binding{
Resource: userResource,
Permission: analyzers.Permission{
Value: value,
},
})
}
}
}
return bindings
}
func secretInfoToAnalyzerResult(info *SecretInfo) *analyzers.AnalyzerResult {
if info == nil {
return nil
}
result := analyzers.AnalyzerResult{
AnalyzerType: analyzerpb.AnalyzerType_HuggingFace,
Metadata: map[string]interface{}{
"username": info.Token.Username,
"name": info.Token.Name,
"token_name": info.Token.Auth.AccessToken.Name,
"token_type": info.Token.Auth.AccessToken.Type,
},
}
if len(info.Token.Orgs) > 0 {
result.UnboundedResources = bakeUnboundedResources(info.Token)
}
result.Bindings = make([]analyzers.Binding, 0)
if info.Token.Auth.AccessToken.Type != FINEGRAINED {
result.Bindings = append(result.Bindings, bakeUnfineGrainedBindings(info.Models, info.Token)...)
}
result.Bindings = append(result.Bindings, bakefineGrainedBindings(info.Models, info.Token)...)
if info.Token.Auth.AccessToken.Type == FINEGRAINED {
result.Bindings = append(result.Bindings, bakeOrganizationBindings(info.Token)...)
result.Bindings = append(result.Bindings, bakeUserBindings(info.Token)...)
}
return &result
}
// HFTokenJSON is the struct for the HF /whoami-v2 API JSON response

View file

@ -4,10 +4,11 @@ import (
"context"
"encoding/json"
"fmt"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"
regexp "github.com/wasilibs/go-re2"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
@ -54,6 +55,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1.Verified = isVerified
s1.ExtraData = extraData
s1.SetVerificationError(verificationErr, resMatch)
s1.AnalysisInfo = map[string]string{"key": resMatch}
}
results = append(results, s1)

View file

@ -191,6 +191,7 @@ func TestHuggingface_FromChunk(t *testing.T) {
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf(" wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
got[i].AnalysisInfo = nil
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {