bat/tests/scripts/find-slow-to-highlight-files.py

126 lines
3.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2020-04-12 15:41:12 +00:00
#
# This script goes through all languages that are supported by 'bat'. For each
2021-08-21 10:22:54 +00:00
# language, it loops over the corresponding file extensions and searches a
2020-04-12 15:41:12 +00:00
# given folder for matching files. It calls 'bat' for each of these files and
# measures the highlighting speed (number of characters per second). The script
# reports files which lead to slow highlighting speeds or errors during the
# execution of 'bat'.
#
# Requirements (external programs):
# - bat (in the $PATH)
# - fd (https://github.com/sharkdp/fd)
# - wc
import sys
import time
import os
import subprocess as sp
2020-04-12 15:41:12 +00:00
# Threshold speed, characters per second
THRESHOLD_SPEED = 20000
# Maximum time we allow `bat` to run
BAT_TIMEOUT_SEC = 10
# Maximum number of files to measure
MAX_NUM_FILES = 100
# Root folder for the search
SEARCH_ROOT = os.getenv("HOME")
def find_slow_files(startup_time, glob_pattern, language=None):
out = sp.check_output(
[
"fd",
"--hidden",
"--no-ignore",
"--type=file",
"--max-results",
str(MAX_NUM_FILES),
"--glob",
glob_pattern,
SEARCH_ROOT,
]
)
paths = out.split(b"\n")[:-1]
language_text = f"Language {language}, " if language else ""
print(f"{language_text}glob pattern: {glob_pattern} ({len(paths)} matches)")
for path in paths:
num_chars = int(sp.check_output(["wc", "-c", path]).split(b" ")[0].decode())
if num_chars < 500:
# It is hard to measure the exact speed for short files
continue
try:
start = time.time()
sp.check_output(["bat", "--color=always", path], timeout=BAT_TIMEOUT_SEC)
duration = time.time() - start - startup_time
if duration <= 0:
continue
highlighting_speed = num_chars / duration
if highlighting_speed < THRESHOLD_SPEED:
print(f" {highlighting_speed:10.0f} chars/s: {path.decode()}")
except sp.CalledProcessError:
print(f" Error while highlighting file '{path.decode()}'.")
except sp.TimeoutExpired:
if num_chars < THRESHOLD_SPEED * BAT_TIMEOUT_SEC:
print(f" Error: bat timed out on file '{path.decode()}'.")
else:
print(
f" Warning: bat timed out on file '{path.decode()} (but the file is large)."
)
def measure_bat_startup_speed():
min_duration = None
for _ in range(20):
start = time.time()
p = sp.Popen(
["bat", "--color=always", "--language=py"], stdin=sp.PIPE, stdout=sp.PIPE
)
p.communicate(input=b"test")
duration = time.time() - start
if not min_duration or duration < min_duration:
min_duration = duration
return min_duration
def traverse_all_languages(startup_time):
output = sp.check_output(["bat", "--list-languages"]).decode()
for line in output.strip().split("\n"):
language, extensions = line.split(":")
for ext in extensions.split(","):
find_slow_files(startup_time, ext, language)
if not ext.startswith("."):
find_slow_files(startup_time, f"*.{ext}", language)
def main():
print("Measuring 'bat' startup speed ... ", flush=True, end="")
startup_time = measure_bat_startup_speed()
print(f"{startup_time * 1000:.1f} ms")
if len(sys.argv) == 1:
traverse_all_languages(startup_time)
else:
pattern = sys.argv[1]
find_slow_files(startup_time, pattern)
if __name__ == "__main__":
main()