Add huggingface tui config (#3060)

* add huggingface tui config

* update readme

* undo auto format
This commit is contained in:
Hon 2024-09-03 12:54:39 -07:00 committed by GitHub
parent 15faaba61c
commit f52d8e872d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 107 additions and 2 deletions

View file

@ -309,11 +309,33 @@ The following command will enumerate deleted and hidden commits on a GitHub repo
trufflehog github-experimental --repo https://github.com/<USER>/<REPO>.git --object-discovery
```
In addition to the normal TruffleHog output, the `--object-discovery` flag creates two files in a new `$HOME/.trufflehog` directory: `valid_hidden.txt` and `invalid.txt`. These are used to track state during commit enumeration, as well as to provide users with a complete list of all hidden and deleted commits (`valid_hidden.txt`). If you'd like to automatically remove these files after scanning, please add the flag `--delete-cached-data`.
In addition to the normal TruffleHog output, the `--object-discovery` flag creates two files in a new `$HOME/.trufflehog` directory: `valid_hidden.txt` and `invalid.txt`. These are used to track state during commit enumeration, as well as to provide users with a complete list of all hidden and deleted commits (`valid_hidden.txt`). If you'd like to automatically remove these files after scanning, please add the flag `--delete-cached-data`.
**Note**: Enumerating all valid commits on a repository using this method takes between 20 minutes and a few hours, depending on the size of your repository. We added a progress bar to keep you updated on how long the enumeration will take. The actual secret scanning runs extremely fast.
For more information on Cross Fork Object References, please [read our blog post](https://trufflesecurity.com/blog/anyone-can-access-deleted-and-private-repo-data-github).
For more information on Cross Fork Object References, please [read our blog post](https://trufflesecurity.com/blog/anyone-can-access-deleted-and-private-repo-data-github).
## 16. Scan Hugging Face
### Scan a Hugging Face Model, Dataset or Space
```bash
trufflehog huggingface --model <model_id> --space <space_id> --dataset <dataset_id>
```
### Scan all Models, Datasets and Spaces belonging to a Hugging Face Organization or User
```bash
trufflehog huggingface --org <orgname> --user <username>
```
(Optionally) When scanning an organization or user, you can skip an entire class of resources with `--skip-models`, `--skip-datasets`, `--skip-spaces` OR a particular resource with `--ignore-models <model_id>`, `--ignore-datasets <dataset_id>`, `--ignore-spaces <space_id>`.
### Scan Discussion and PR Comments
```bash
trufflehog huggingface --model <model_id> --include-discussions --include-prs
```
# :question: FAQ

View file

@ -65,6 +65,7 @@ func New(c common.Common) *SourceSelect {
OssItem("Git", "Scan git repositories."),
OssItem("GitHub", "Scan GitHub repositories and/or organizations."),
OssItem("Filesystem", "Scan your filesystem by selecting what directories to scan."),
OssItem("Hugging Face", "Scan Hugging Face, an AI/ML community."),
OssItem("Jenkins", "Scan Jenkins, a CI/CD platform. (Recently open-sourced from enterprise!)"),
OssItem("Elasticsearch", "Scan your Elasticsearch cluster or Elastic Cloud instance."),
OssItem("Postman", "Scan a collection, workspace, or environment from Postman, the API platform."),

View file

@ -0,0 +1,77 @@
package huggingface
import (
"strings"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/components/textinputs"
)
type huggingFaceCmdModel struct {
textinputs.Model
}
func GetNote() string {
return "Please enter the organization, user, model, space, or dataset you would like to scan."
}
func GetFields() huggingFaceCmdModel {
org := textinputs.InputConfig{
Label: "Organization",
Key: "org",
Required: false,
Help: "Hugging Face organization name. This will scan all models, datasets, and spaces belonging to the organization.",
}
user := textinputs.InputConfig{
Label: "Username",
Key: "user",
Required: false,
Help: "Hugging Face user. This will scan all models, datasets, and spaces belonging to the user.",
}
model := textinputs.InputConfig{
Label: "Model",
Key: "model",
Required: false,
Help: "Hugging Face model. Example: org/model_name or user/model_name",
}
space := textinputs.InputConfig{
Label: "Space",
Key: "space",
Required: false,
Help: "Hugging Face space. Example: org/space_name or user/space_name.",
}
dataset := textinputs.InputConfig{
Label: "Dataset",
Key: "dataset",
Required: false,
Help: "Hugging Face dataset. Example: org/dataset_name or user/dataset_name.",
}
return huggingFaceCmdModel{textinputs.New([]textinputs.InputConfig{org, user, model, space, dataset})}
}
func (m huggingFaceCmdModel) Cmd() string {
var command []string
command = append(command, "trufflehog", "huggingface")
inputs := m.GetInputs()
keys := []string{"org", "user", "model", "space", "dataset"}
for _, key := range keys {
val, ok := inputs[key]
if !ok || val.Value == "" {
continue
}
command = append(command, "--"+key+"="+val.Value)
}
return strings.Join(command, " ")
}
func (m huggingFaceCmdModel) Summary() string {
inputs := m.GetInputs()
labels := m.GetLabels()
keys := []string{"org", "user", "model", "space", "dataset"}
return common.SummarizeSource(keys, inputs, labels)
}

View file

@ -12,6 +12,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/git"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/github"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/gitlab"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/huggingface"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/jenkins"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/postman"
"github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/s3"
@ -27,6 +28,8 @@ func GetSourceNotes(sourceName string) string {
return postman.GetNote()
case "elasticsearch":
return elasticsearch.GetNote()
case "huggingface":
return huggingface.GetNote()
case "jenkins":
return jenkins.GetNote()
@ -63,6 +66,8 @@ func GetSourceFields(sourceName string) CmdModel {
return github.GetFields()
case "gitlab":
return gitlab.GetFields()
case "hugging face":
return huggingface.GetFields()
case "jenkins":
return jenkins.GetFields()
case "postman":