Add in-depth quality gate checks (#949)

* add in-depth quality gate checks

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add quality tests to PR checks

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-10-05 16:26:26 -04:00 committed by GitHub
parent 7ad60ce410
commit d4587ddeec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 653 additions and 0 deletions

View file

@ -9,6 +9,7 @@ on:
env:
GO_VERSION: "1.18.x"
GO_STABLE_VERSION: true
PYTHON_VERSION: "3.10"
jobs:
Static-Analysis:
@ -100,6 +101,58 @@ jobs:
name: unit-test-results
path: test/results/**/*
Quality-Test:
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
name: "Quality tests"
runs-on: ubuntu-20.04
steps:
- uses: actions/setup-go@v2
with:
go-version: ${{ env.GO_VERSION }}
stable: ${{ env.GO_STABLE_VERSION }}
- uses: actions/setup-python@v2
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/checkout@v2
with:
submodules: true
- name: Restore tool cache
id: tool-cache
uses: actions/cache@v2.1.3
with:
path: ${{ github.workspace }}/.tmp
key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }}
- name: Restore go cache
id: go-cache
uses: actions/cache@v2.1.3
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-${{ env.GO_VERSION }}-
- name: Restore python cache
id: python-cache
uses: actions/cache@v2.1.3
with:
path: |
test/quality/venv
test/quality/vulnerability-match-labels/venv
key: ${{ runner.os }}-go-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/test/quality/**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-go-${{ env.PYTHON_VERSION }}-
- name: (cache-miss) Bootstrap all project dependencies
if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true'
run: make bootstrap
- name: Run quality tests
run: make quality
Integration-Test:
# Note: changing this job name requires making the same update in the .github/workflows/release.yaml pipeline
name: "Integration tests"

4
.gitmodules vendored Normal file
View file

@ -0,0 +1,4 @@
[submodule "test/quality/vulnerability-match-labels"]
path = test/quality/vulnerability-match-labels
url = git@github.com:anchore/vulnerability-match-labels.git
branch = main

View file

@ -168,6 +168,11 @@ unit: ## Run unit tests (with coverage)
@echo "Coverage: $$(cat $(COVER_TOTAL))"
@if [ $$(echo "$$(cat $(COVER_TOTAL)) >= $(COVERAGE_THRESHOLD)" | bc -l) -ne 1 ]; then echo "$(RED)$(BOLD)Failed coverage quality gate (> $(COVERAGE_THRESHOLD)%)$(RESET)" && false; fi
.PHONY: quality
quality: ## Run quality tests
$(call title,Running quality tests)
cd test/quality && make
# note: this is used by CI to determine if the install test fixture cache (docker image tars) should be busted
install-fingerprint:
cd test/install && \

7
test/quality/.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
venv
.yardstick/tools
.yardstick/result
stage
pull
migrate.py
.oras-cache

View file

@ -0,0 +1,45 @@
x-ref:
# note: always reference images with BOTH a tag and a digest
images: &images
- docker.io/cloudbees/cloudbees-core-mm:2.277.3.1@sha256:4c564f473d38f23da1caa48c4ef53b958ef03d279232007ad3319b1f38584bdb
- docker.io/anchore/test_images:grype-quality-node-d89207b@sha256:f56164678054e5eb59ab838367373a49df723b324617b1ba6de775749d7f91d4
- docker.io/anchore/test_images:grype-quality-python-d89207b@sha256:b2b58a55c0b03c1626d2aaae2add9832208b02124dda7b7b41811e14f0fb272c
- docker.io/anchore/test_images:grype-quality-java-d89207b@sha256:b3534fc2e37943136d5b54e3a58b55d4ccd4363d926cf7aa5bf55a524cf8275b
- docker.io/anchore/test_images:grype-quality-golang-d89207b@sha256:7536ee345532f674ec9e448e3768db4e546c48220ba2b6ec9bc9cfbfb3b7b74a
- docker.io/anchore/test_images:grype-quality-ruby-d89207b@sha256:1a5a5f870924e88a6f0f2b8089cf276ef0a79b5244a052cdfe4a47bb9e5a2c10
- docker.io/alpine:3.2@sha256:ddac200f3ebc9902fb8cfcd599f41feb2151f1118929da21bcef57dc276975f9
- docker.io/centos:6@sha256:3688aa867eb84332460e172b9250c9c198fdfd8d987605fd53f246f498c60bcf
- docker.io/ubuntu:16.10@sha256:8dc9652808dc091400d7d5983949043a9f9c7132b15c14814275d25f94bca18a
- docker.io/almalinux:8@sha256:cd49d7250ed7bb194d502d8a3e50bd775055ca275d1d9c2785aea72b890afe6a
- docker.io/rockylinux:8@sha256:72afc2e1a20c9ddf56a81c51148ebcbe927c0a879849efe813bee77d69df1dd8
- docker.io/oraclelinux:6@sha256:a06327c0f1d18d753f2a60bb17864c84a850bb6dcbcf5946dd1a8123f6e75495
- docker.io/debian:7@sha256:81e88820a7759038ffa61cff59dfcc12d3772c3a2e75b7cfe963c952da2ad264
- docker.io/busybox:1.28.1@sha256:2107a35b58593c58ec5f4e8f2c4a70d195321078aebfadfbfb223a2ff4a4ed21
- docker.io/amazonlinux:2@sha256:1301cc9f889f21dc45733df9e58034ac1c318202b4b0f0a08d88b3fdc03004de
- registry.access.redhat.com/ubi8@sha256:68fecea0d255ee253acbf0c860eaebb7017ef5ef007c25bee9eeffd29ce85b29
# new vulnerabilities are added all of the time, instead of keeping up it's easier to ignore newer entries.
# This approach helps tremendously with keeping the analysis relatively stable.
default_max_year: 2020
result-sets:
pr_vs_latest_via_sbom:
description: "latest released grype vs grype from the current build (via SBOM ingestion)"
matrix:
images: *images
tools:
- name: syft
# note: we want to use a fixed version of syft for capturing all results (NOT "latest")
version: v0.54.0
produces: SBOM
refresh: false
- name: grype
version: git:current-commit
takes: SBOM
- name: grype
version: latest
takes: SBOM

View file

@ -0,0 +1 @@
../vulnerability-match-labels/labels

68
test/quality/Makefile Normal file
View file

@ -0,0 +1,68 @@
SBOM_STORE_TAG = md5-$(shell md5sum .yardstick.yaml | cut -d' ' -f1)
SBOM_STORE_IMAGE = ghcr.io/anchore/grype/quality-test-sbom-store:$(SBOM_STORE_TAG)
ACTIVATE_VENV = . venv/bin/activate &&
YARDSTICK = $(ACTIVATE_VENV) yardstick -v
YARDSTICK_RESULT_DIR = .yardstick/result
YARDSTICK_LABELS_DIR = .yardstick/labels
VULNERABILITY_LABELS = ./vulnerability-labels
RESULT_SET = pr_vs_latest_via_sbom
# formatting variables
BOLD := $(shell tput -T linux bold)
PURPLE := $(shell tput -T linux setaf 5)
GREEN := $(shell tput -T linux setaf 2)
CYAN := $(shell tput -T linux setaf 6)
RED := $(shell tput -T linux setaf 1)
RESET := $(shell tput -T linux sgr0)
TITLE := $(BOLD)$(PURPLE)
SUCCESS := $(BOLD)$(GREEN)
.PHONY: all
all: capture validate ## Fetch or capture all data and run all quality checks
.PHONY: validate
validate: venv $(VULNERABILITY_LABELS) ## Run all quality checks against already collected data
$(ACTIVATE_VENV) ./gate.py
.PHONY: capture
capture: sboms vulns ## Collect and store all syft and grype results
.PHONY: capture
vulns: venv ## Collect and store all grype results
$(YARDSTICK) -v result capture -r $(RESULT_SET)
.PHONY: sboms
sboms: $(YARDSTICK_RESULT_DIR) venv clear-results ## Collect and store all syft results (deletes all existing results)
bash -c "make download-sboms || ($(YARDSTICK) -v result capture -r $(RESULT_SET) --only-producers)"
.PHONY: download-sboms
download-sboms:
cd vulnerability-match-labels && make venv
bash -c "export ORAS_CACHE=$(shell pwd)/.oras-cache && make venv && . vulnerability-match-labels/venv/bin/activate && ./vulnerability-match-labels/sboms.py download -r $(RESULT_SET)"
venv: venv/touchfile
venv/touchfile: requirements.txt
test -d venv || python3 -m venv venv
$(ACTIVATE_VENV) pip install -Ur requirements.txt
touch venv/touchfile
$(YARDSTICK_RESULT_DIR):
mkdir -p $(YARDSTICK_RESULT_DIR)
$(VULNERABILITY_LABELS):
git submodule update vulnerability-match-labels
.PHONY: clear-results
clear-results: venv ## Clear all existing yardstick results
$(YARDSTICK) result clear
.PHONY: clean
clean: clear-results ## Clear all existing yardstick results and delete python environment
rm -rf venv
find -iname "*.pyc" -delete
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}'

140
test/quality/README.md Normal file
View file

@ -0,0 +1,140 @@
# Match quality testing
This form of testing compares the results from various releases of grype using a
static set of reference container images. The kinds of comparisons made are:
1) "relative": find the vulnerability matching differences between both tools
for a given image. This helps identify when a change has occurred in matching
behavior and where the changes are.
2) "against labels": pair each tool results for an image with ground truth. This
helps identify how well the matching behavior is performing (did it get
better or worse).
## Getting started
To capture raw tool output and store into the local `.yardstick` directory for
further analysis:
```
make capture
```
To analyze the tool output and evaluate a pass/fail result:
```
make validate
```
A pass/fail result is shown in the output with reasons for the failure being
listed explicitly.
## What is the quality gate criteria
The label comparison results are used to determine a pass/fail result,
specifically with the following criteria:
- fail when current grype F1 score drops below last grype release F1 score (or
F1 score is indeterminate)
- fail when the indeterminate matches % > 10% in the current grype results
- fail when there is a rise in FNs relative to the results from the last grype
release
- otherwise, pass
F1 score is the primary way that tool matching performance is characterized. F1
score combines the TP, FP, and FN counts into a single metric between 0 and 1.
Ideally the F1 score for an image-tool pair should be 1. F1 score is a good way
to summarize the matching performance but does not explain why the matching
performance is what it is.
Indeterminate matches are matches from results that could not be pared with a
label (TP or FP). This could also mean that multiple conflicting labels were
found for the a single match. The more indeterminate matches there are the less
confident you can be about the F1 score. Ideally there should be 0 indeterminate
matches, but this is difficult to achieve since vulnerability data is constantly
changing.
False negatives represent matches that should have been made by the tool but
were missed. We should always make certain that this value does not increase
between releases of grype.
## Assumptions
1. **Comparing vulnerability results taken at different times is invalid**.
We leverage the yardstick result-set feature to capture all vulnerability
results at one time for a specific image and tool set. Why? If we use grype
at version `a` on monday and grype at version `b` on tuesday and attempt to
compare the results, if differences are found it will not be immediately
clear why the results are different. That is, it is entirely possible that
the vulnerability databases from the run of `b` simply had more up to date
information, and if `grype@a` were run at the same time (on tuesday) this
reason can be almost entirely eliminated.
2. **Comparing vulnerability results across images with different digests is invalid**.
It may be very tempting to compare vulnerability results for
`alpine:3.2` from monday and `alpine:3.2` from tuesday to see if there are
any changes. However, this is potentially inaccurate as the image references
are for the same tag, but the publisher may have pushed a new image with
differing content. Any change could lead to different vulnerability matching
results but we are only interested in vulnerability match differences that
are due to actionable reasons (grype matcher logic problems or [SBOM] input
data into matchers).
## Approach
Vulnerability matching has essentially two inputs:
- the packages that were found in the scanned artifact
- the vulnerability data from upstream providers (e.g. NVD, GHSA, etc.)
These are both moving targets!
We may implement more catalogers in syft that raise up more packages discovered
over time (for the same artifact scanned). Also the world is continually finding
and reporting new vulnerabilities. The more moving parts there are in this form
of testing the harder it is to come to a conclusion about the actual quality of
the output over time.
To reduce the eroding value over time we've decided to change as many moving
targets into fixed targets as possible:
- Vulnerability results beyond a particular year are ignored (the current config
allows for <= 2020). Though there are still retroactive CVEs created, this
helps a lot in terms of keeping vulnerability results relatively stable.
- SBOMs are used as input into grype instead of the raw container images. This
allows the artifacts under test to remain truly fixed and saves a lot of time
when capturing grype results (as the container image is no longer needed
during analysis).
- For the captured SBOMs, container images referenced must be with a digest, not
just a tag. In case we update a tool version (say syft) we want to make
certain that we are scanning the exact same artifact later when we re-run the
analysis.
- Versions of tools used are fixed to a specific `major.minor.patch` release used.
This allows us to account for capability differences between tool runs.
To reduce maintenance effort of this comparison over time there are a few things
to keep in mind:
- Once an image is labeled (at a specific digest) the image digest should be
considered immutable (never updated). Why? It takes a lot of effort to label
images and there are no "clearly safe" assumptions that can be made when it
comes to migrating labels from one image to another no matter how "similar"
the images may be. There is also no value in updating the image; these images
are not being executed and their only purpose is to survey the matching
performance of grype. In the philosophy of "maximizing fixed points" it
doesn't make sense to change these assets. Over time it may be that we remove
assets that are no longer useful for comparison, but this should rarely be
done.
- Consider not changing the CVE year max-ceiling (currently set to 2020).
Pushing this ceiling will likely raise the number of unlabled matches
significantly for all images. Only bump this ceiling if all possible matches
are labeled.

326
test/quality/gate.py Executable file
View file

@ -0,0 +1,326 @@
#!/usr/bin/env python3
import logging
import os
import re
import subprocess
import sys
from typing import Optional
import click
from tabulate import tabulate
from dataclasses import dataclass, InitVar, field
import yardstick
from yardstick import store, comparison, artifact, arrange
from yardstick.cli import display, config
# see the .yardstick.yaml configuration for details
default_result_set = "pr_vs_latest_via_sbom"
yardstick.utils.grype_db.raise_on_failure(False)
@dataclass
class Gate:
label_comparisons: InitVar[Optional[list[comparison.AgainstLabels]]]
label_comparison_stats: InitVar[Optional[comparison.ImageToolLabelStats]]
reasons: list[str] = field(default_factory=list)
def __post_init__(self, label_comparisons: Optional[list[comparison.AgainstLabels]], label_comparison_stats: Optional[comparison.ImageToolLabelStats]):
if not label_comparisons and not label_comparison_stats:
return
reasons = []
# - fail when current F1 score drops below last release F1 score (or F1 score is indeterminate)
# - fail when indeterminate % > 10%
# - fail when there is a rise in FNs
latest_release_tool, current_tool = guess_tool_orientation(label_comparison_stats.tools)
latest_release_comparisons_by_image = {comp.config.image: comp for comp in label_comparisons if comp.config.tool == latest_release_tool }
current_comparisons_by_image = {comp.config.image: comp for comp in label_comparisons if comp.config.tool == current_tool }
for image, comp in current_comparisons_by_image.items():
latest_f1_score = latest_release_comparisons_by_image[image].summary.f1_score
current_f1_score = comp.summary.f1_score
if current_f1_score < latest_f1_score:
reasons.append(f"current F1 score is lower than the latest release F1 score: {bcolors.BOLD+bcolors.UNDERLINE}current={current_f1_score:0.2f} latest={latest_f1_score:0.2f}{bcolors.RESET} image={image}")
if comp.summary.indeterminate_percent > 10:
reasons.append(f"current indeterminate matches % is greater than 10%: {bcolors.BOLD+bcolors.UNDERLINE}current={comp.summary.indeterminate_percent:0.2f}%{bcolors.RESET} image={image}")
latest_fns = latest_release_comparisons_by_image[image].summary.false_negatives
current_fns = comp.summary.false_negatives
if current_fns > latest_fns:
reasons.append(f"current false negatives is greater than the latest release false negatives: {bcolors.BOLD+bcolors.UNDERLINE}current={current_fns} latest={latest_fns}{bcolors.RESET} image={image}")
self.reasons = reasons
def passed(self):
return len(self.reasons) == 0
def guess_tool_orientation(tools: list[str]):
if len(tools) != 2:
raise RuntimeError("expected 2 tools, got %s" % tools)
current_tool = None
latest_release_tool = None
for tool in tools:
if tool.endswith("latest"):
latest_release_tool = tool
continue
current_tool = tool
if latest_release_tool is None:
# "latest" value isn't accessible, so we do a best guess at which version is latest
current_tool, latest_release_tool = sorted(tools)
if current_tool is None:
raise ValueError("current tool not found")
return latest_release_tool, current_tool
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
RESET = '\033[0m'
def show_results_used(results: list[artifact.ScanResult]):
print(f" Results used:")
for idx, result in enumerate(results):
branch = "├──"
if idx == len(results) - 1:
branch = "└──"
print(f" {branch} {result.ID} : {result.config.tool} against {result.config.image}")
print()
def validate(cfg: config.Application, result_set: str, images: list[str], always_run_label_comparison: bool, verbosity: int, label_entries: Optional[list[artifact.LabelEntry]] = None):
print(f"{bcolors.HEADER}{bcolors.BOLD}Validating with {result_set!r}", bcolors.RESET)
result_set_obj = store.result_set.load(name=result_set)
ret = []
for image, result_states in result_set_obj.result_state_by_image.items():
if images and image not in images:
print("Skipping image:", image)
continue
print()
print("Testing image:", image)
for state in result_states:
print(" ", f"with {state.request.tool}")
print()
gate = validate_image(cfg, [s.config.path for s in result_states], always_run_label_comparison=always_run_label_comparison, verbosity=verbosity, label_entries=label_entries)
ret.append(gate)
failure = not gate.passed()
if failure:
print(f"{bcolors.FAIL}{bcolors.BOLD}Failed quality gate{bcolors.RESET}")
for reason in gate.reasons:
print(f" - {reason}")
print()
size = 120
print(""*size)
print(""*size)
print(""*size)
return ret
def validate_image(cfg: config.Application, descriptions: list[str], always_run_label_comparison: bool, verbosity: int, label_entries: Optional[list[artifact.LabelEntry]] = None):
# do a relative comparison
# - show comparison summary (no gating action)
# - list out all individual match differences
print(f"{bcolors.HEADER}Running relative comparison...", bcolors.RESET)
relative_comparison = yardstick.compare_results(descriptions=descriptions, year_max_limit=cfg.default_max_year)
show_results_used(relative_comparison.results)
# show the relative comparison results
if verbosity > 0:
details = verbosity > 1
display.preserved_matches(relative_comparison, details=details, summary=True, common=False)
print()
# bail if there are no differences found
if not always_run_label_comparison and not sum([len(relative_comparison.unique[result.ID]) for result in relative_comparison.results]):
print("no differences found between tool results")
return Gate(None, None)
# do a label comparison
print(f"{bcolors.HEADER}Running comparison against labels...", bcolors.RESET)
results, label_entries, comparisons_by_result_id, stats_by_image_tool_pair = yardstick.compare_results_against_labels(descriptions=descriptions, year_max_limit=cfg.default_max_year, label_entries=label_entries)
show_results_used(results)
if verbosity > 0:
show_fns = verbosity > 1
display.label_comparison(
results,
comparisons_by_result_id,
stats_by_image_tool_pair,
show_fns=show_fns,
show_summaries=True,
)
latest_release_tool, current_tool = guess_tool_orientation([r.config.tool for r in results])
# show the relative comparison unique differences paired up with label conclusions (TP/FP/FN/TN/Unknown)
all_rows: list[list[Any]] = []
for result in relative_comparison.results:
label_comparison = comparisons_by_result_id[result.ID]
for unique_match in relative_comparison.unique[result.ID]:
labels = label_comparison.labels_by_match[unique_match.ID]
if not labels:
label = "(unknown)"
elif len(set(labels)) > 1:
label = ", ".join([l.name for l in labels])
else:
label = labels[0].name
color = ""
commentary = ""
if result.config.tool == latest_release_tool:
# the tool which found the unique result is the latest release tool...
if label == artifact.Label.TruePositive.name:
# drats! we missed a case (this is a new FN)
color = bcolors.FAIL
commentary = "(this is a new FN 😱)"
elif artifact.Label.FalsePositive.name in label:
# we got rid of a FP! ["hip!", "hip!"]
color = bcolors.OKBLUE
commentary = "(got rid of a former FP 🙌)"
else:
# the tool which found the unique result is the current tool...
if label == artifact.Label.TruePositive.name:
# highest of fives! we found a new TP that the previous tool release missed!
color = bcolors.OKBLUE
commentary = "(this is a new TP 🙌)"
elif artifact.Label.FalsePositive.name in label:
# welp, our changes resulted in a new FP... not great, maybe not terrible?
color = bcolors.FAIL
commentary = "(this is a new FP 😱)"
all_rows.append(
[
f"{color}{result.config.tool} ONLY{bcolors.RESET}",
f"{color}{unique_match.package.name}@{unique_match.package.version}{bcolors.RESET}",
f"{color}{unique_match.vulnerability.id}{bcolors.RESET}",
f"{color}{label}{bcolors.RESET}",
f"{commentary}",
]
)
def escape_ansi(line):
ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]')
return ansi_escape.sub('', line)
# sort but don't consider ansi escape codes
all_rows = sorted(all_rows, key=lambda x: escape_ansi(str(x[0]+x[1]+x[2]+x[3])))
if len(all_rows) == 0:
print("No differences found between tooling (with labels)")
else:
print("Match differences between tooling (with labels):")
indent = " "
print(indent + tabulate([["TOOL PARTITION", "PACKAGE", "VULNERABILITY", "LABEL", "COMMENTARY"]]+all_rows, tablefmt="plain").replace("\n", "\n" + indent) + "\n")
# populate the quality gate with data that can evaluate pass/fail conditions
return Gate(label_comparisons=comparisons_by_result_id.values(), label_comparison_stats=stats_by_image_tool_pair)
@click.command()
@click.option("--image", "-i", "images", multiple=True, help="filter down to one or more images to validate with (don't use the full result set)")
@click.option("--label-comparison", "-l", "always_run_label_comparison", is_flag=True, help="run label comparison irregardless of relative comparison results")
@click.option("--breakdown-by-ecosystem", "-e", is_flag=True, help="show label comparison results broken down by ecosystem")
@click.option("--verbose", "-v", "verbosity", count=True, help="show details of all comparisons")
@click.option("--result-set", "-r", default=default_result_set, help="the result set to use for the quality gate")
def main(images: list[str], always_run_label_comparison: bool, breakdown_by_ecosystem: bool, verbosity: int, result_set: str):
cfg = config.load()
setup_logging(verbosity)
# let's not load any more labels than we need to, base this off of the images we're validating
if not images:
images = set()
result_set_obj = store.result_set.load(name=result_set)
for state in result_set_obj.state:
images.add(state.config.image)
images = sorted(list(images))
print("Loading label entries...", end=" ")
label_entries = store.labels.load_for_image(images, year_max_limit=cfg.default_max_year)
print(f"done! {len(label_entries)} entries loaded")
result_sets = [result_set] # today only one result set is supported, but more can be added
gates = []
for result_set in result_sets:
gates.extend(validate(cfg, result_set, images=images, always_run_label_comparison=always_run_label_comparison, verbosity=verbosity, label_entries=label_entries))
print()
if breakdown_by_ecosystem:
print(f"{bcolors.HEADER}Breaking down label comparison by ecosystem performance...", bcolors.RESET)
results_by_image, label_entries, stats = yardstick.compare_results_against_labels_by_ecosystem(result_set=result_set, year_max_limit=cfg.default_max_year, label_entries=label_entries)
display.labels_by_ecosystem_comparison(
results_by_image,
stats,
show_images_used=False,
)
print()
failure = not all([gate.passed() for gate in gates])
if failure:
print("Reasons for quality gate failure:")
for gate in gates:
for reason in gate.reasons:
print(f" - {reason}")
if failure:
print()
print(f"{bcolors.FAIL}{bcolors.BOLD}Quality gate FAILED{bcolors.RESET}")
sys.exit(1)
else:
print(f"{bcolors.OKGREEN}{bcolors.BOLD}Quality gate passed!{bcolors.RESET}")
def setup_logging(verbosity: int):
# pylint: disable=redefined-outer-name, import-outside-toplevel
import logging.config
if verbosity in [0, 1, 2]:
log_level = "WARN"
elif verbosity == 3:
log_level = "INFO"
else:
log_level = "DEBUG"
logging.config.dictConfig(
{
"version": 1,
"formatters": {
"standard": {
# [%(module)s.%(funcName)s]
"format": "%(asctime)s [%(levelname)s] %(message)s",
"datefmt": "",
},
},
"handlers": {
"default": {
"level": log_level,
"formatter": "standard",
"class": "logging.StreamHandler",
"stream": "ext://sys.stderr",
},
},
"loggers": {
"": { # root logger
"handlers": ["default"],
"level": log_level,
},
},
}
)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,3 @@
git+https://github.com/anchore/yardstick@4526ad2ff6d33d34e900ed692c3a90adc80eab73
# ../../../yardstick
tabulate==0.8.10

@ -0,0 +1 @@
Subproject commit 3a2ecc336411ddc3f37b7d5c123b80f6848a2cf3