mirror of
https://github.com/AsahiLinux/u-boot
synced 2025-03-16 23:07:00 +00:00
Merge branch '2023-02-10-update-trace-feature-to-work-with-trace-cmd'
To quote the author: Since U-Boot's tracing feature was originally written, quite a few changes have taken place in this domain. The original text format used by tracing is still emitted by Linux, but a new trace-cmd tool has invented a binary format which is now used by new tools, such as kernelshark. With recent distributions and the move to Python 3, the old pybootchart tool does not build or run. Unfortunately there is no 1:1 replacement for the features that were provided by pybootchart, or at least it is not obvious. Still, it makes sense to keep with the times. This series updates proftool to use the new binary format, adding support for function and funcgraph tracing, so that U-Boot's trace records can be examined by trace-cmd and kernelshark. This series also adds support for a flamegraph, which provides a visual way to see which functions are called a lot, as well as which ones consume the most time. Some minor updates to the trace implementation within U-Boot are included, to provide a little more information and to fix a few problems. No unit tests are provided by proftool, but a functional test ensures that sandbox can emit traces which can be processed by proftool, then parsed by trace-cmd and that the timing of the various formats looks consistent.
This commit is contained in:
commit
78d1c3949a
26 changed files with 2349 additions and 231 deletions
|
@ -2,7 +2,7 @@ variables:
|
|||
windows_vm: windows-2019
|
||||
ubuntu_vm: ubuntu-22.04
|
||||
macos_vm: macOS-12
|
||||
ci_runner_image: trini/u-boot-gitlab-ci-runner:jammy-20221130-11Jan2023
|
||||
ci_runner_image: trini/u-boot-gitlab-ci-runner:jammy-20230126-10Feb2023
|
||||
# Add '-u 0' options for Azure pipelines, otherwise we get "permission
|
||||
# denied" error when it tries to "useradd -m -u 1001 vsts_azpcontainer",
|
||||
# since our $(ci_runner_image) user is not root.
|
||||
|
@ -240,6 +240,11 @@ stages:
|
|||
TEST_PY_TEST_SPEC: "test_ofplatdata or test_handoff or test_spl"
|
||||
sandbox_flattree:
|
||||
TEST_PY_BD: "sandbox_flattree"
|
||||
sandbox_trace:
|
||||
TEST_PY_BD: "sandbox"
|
||||
BUILD_ENV: "FTRACE=1 NO_LTO=1"
|
||||
TEST_PY_TEST_SPEC: "trace"
|
||||
OVERRIDE: "-a CONFIG_TRACE=y -a CONFIG_TRACE_EARLY=y -a CONFIG_TRACE_EARLY_SIZE=0x01000000"
|
||||
coreboot:
|
||||
TEST_PY_BD: "coreboot"
|
||||
TEST_PY_ID: "--id qemu"
|
||||
|
@ -362,6 +367,9 @@ stages:
|
|||
# the below corresponds to .gitlab-ci.yml "script"
|
||||
cd ${WORK_DIR}
|
||||
export UBOOT_TRAVIS_BUILD_DIR=/tmp/${TEST_PY_BD};
|
||||
if [ -n "${BUILD_ENV}" ]; then
|
||||
export ${BUILD_ENV};
|
||||
fi
|
||||
tools/buildman/buildman -o ${UBOOT_TRAVIS_BUILD_DIR} -w -E -W -e --board ${TEST_PY_BD} ${OVERRIDE}
|
||||
cp ~/grub_x86.efi ${UBOOT_TRAVIS_BUILD_DIR}/
|
||||
cp ~/grub_x64.efi ${UBOOT_TRAVIS_BUILD_DIR}/
|
||||
|
|
1
.gitattributes
vendored
1
.gitattributes
vendored
|
@ -4,3 +4,4 @@
|
|||
*.bmp binary
|
||||
*.ttf binary
|
||||
*.gz binary
|
||||
*.png binary
|
||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -105,3 +105,6 @@ __pycache__
|
|||
# pylint files
|
||||
/pylint.cur
|
||||
/pylint.out/
|
||||
|
||||
# moveconfig database
|
||||
/moveconfig.db
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# Grab our configured image. The source for this is found
|
||||
# in the u-boot tree at tools/docker/Dockerfile
|
||||
image: trini/u-boot-gitlab-ci-runner:jammy-20221130-11Jan2023
|
||||
image: trini/u-boot-gitlab-ci-runner:jammy-20230126-10Feb2023
|
||||
|
||||
# We run some tests in different order, to catch some failures quicker.
|
||||
stages:
|
||||
|
@ -35,6 +35,9 @@ stages:
|
|||
# If we've been asked to use clang only do one configuration.
|
||||
- export UBOOT_TRAVIS_BUILD_DIR=/tmp/${TEST_PY_BD}
|
||||
- echo BUILD_ENV ${BUILD_ENV}
|
||||
- if [ -n "${BUILD_ENV}" ]; then
|
||||
export ${BUILD_ENV};
|
||||
fi
|
||||
- tools/buildman/buildman -o ${UBOOT_TRAVIS_BUILD_DIR} -w -E -W -e
|
||||
--board ${TEST_PY_BD} ${OVERRIDE}
|
||||
- cp ~/grub_x86.efi $UBOOT_TRAVIS_BUILD_DIR/
|
||||
|
@ -268,6 +271,15 @@ sandbox_vpl test.py:
|
|||
TEST_PY_TEST_SPEC: "test_vpl_help or test_spl"
|
||||
<<: *buildman_and_testpy_dfn
|
||||
|
||||
# Enable tracing and disable LTO, to ensure functions are not elided
|
||||
sandbox trace_test.py:
|
||||
variables:
|
||||
TEST_PY_BD: "sandbox"
|
||||
BUILD_ENV: "FTRACE=1 NO_LTO=1"
|
||||
TEST_PY_TEST_SPEC: "trace"
|
||||
OVERRIDE: "-a CONFIG_TRACE=y -a CONFIG_TRACE_EARLY=y -a CONFIG_TRACE_EARLY_SIZE=0x01000000"
|
||||
<<: *buildman_and_testpy_dfn
|
||||
|
||||
evb-ast2500 test.py:
|
||||
variables:
|
||||
TEST_PY_BD: "evb-ast2500"
|
||||
|
|
|
@ -290,7 +290,10 @@ static int setup_mon_len(void)
|
|||
{
|
||||
#if defined(__ARM__) || defined(__MICROBLAZE__)
|
||||
gd->mon_len = (ulong)&__bss_end - (ulong)_start;
|
||||
#elif defined(CONFIG_SANDBOX) && !defined(__riscv)
|
||||
gd->mon_len = (ulong)&_end - (ulong)_init;
|
||||
#elif defined(CONFIG_SANDBOX)
|
||||
/* gcc does not provide _init in crti.o on RISC-V */
|
||||
gd->mon_len = 0;
|
||||
#elif defined(CONFIG_EFI_APP)
|
||||
gd->mon_len = (ulong)&_end - (ulong)_init;
|
||||
|
|
|
@ -796,6 +796,15 @@ static init_fnc_t init_sequence_r[] = {
|
|||
|
||||
void board_init_r(gd_t *new_gd, ulong dest_addr)
|
||||
{
|
||||
/*
|
||||
* The pre-relocation drivers may be using memory that has now gone
|
||||
* away. Mark serial as unavailable - this will fall back to the debug
|
||||
* UART if available.
|
||||
*
|
||||
* Do the same with log drivers since the memory may not be available.
|
||||
*/
|
||||
gd->flags &= ~(GD_FLG_SERIAL_READY | GD_FLG_LOG_READY);
|
||||
|
||||
/*
|
||||
* Set up the new global data pointer. So far only x86 does this
|
||||
* here.
|
||||
|
|
BIN
doc/develop/pics/flamegraph.png
Normal file
BIN
doc/develop/pics/flamegraph.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
BIN
doc/develop/pics/flamegraph_timing.png
Normal file
BIN
doc/develop/pics/flamegraph_timing.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 31 KiB |
BIN
doc/develop/pics/flamegraph_zoom.png
Normal file
BIN
doc/develop/pics/flamegraph_zoom.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
BIN
doc/develop/pics/kernelshark.png
Normal file
BIN
doc/develop/pics/kernelshark.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
BIN
doc/develop/pics/kernelshark_fg.png
Normal file
BIN
doc/develop/pics/kernelshark_fg.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 28 KiB |
|
@ -15,12 +15,16 @@ Overview
|
|||
The trace feature uses GCC's instrument-functions feature to trace all
|
||||
function entry/exit points. These are then recorded in a memory buffer.
|
||||
The memory buffer can be saved to the host over a network link using
|
||||
tftpput or by writing to an attached memory device such as MMC.
|
||||
tftpput or by writing to an attached storage device such as MMC.
|
||||
|
||||
On the host, the file is first converted with a tool called 'proftool',
|
||||
which extracts useful information from it. The resulting trace output
|
||||
resembles that emitted by Linux's ftrace feature, so can be visually
|
||||
displayed by pytimechart.
|
||||
displayed by kernelshark (see kernelshark_) and used with
|
||||
'trace-cmd report' (see trace_cmd_).
|
||||
|
||||
It is also possible to produce a flame graph for use with flamegraph.pl
|
||||
(see flamegraph_pl_).
|
||||
|
||||
|
||||
Quick-start using Sandbox
|
||||
|
@ -30,7 +34,7 @@ Sandbox is a build of U-Boot that can run under Linux so it is a convenient
|
|||
way of trying out tracing before you use it on your actual board. To do
|
||||
this, follow these steps:
|
||||
|
||||
Add the following to config/sandbox_defconfig
|
||||
Add the following to `config/sandbox_defconfig`:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
|
@ -75,7 +79,7 @@ a trace:
|
|||
16 maximum observed call depth
|
||||
15 call depth limit
|
||||
1,275,767 calls not traced due to depth
|
||||
=>trace calls 0 e00000
|
||||
=>trace calls 1000000 e00000
|
||||
Call list dumped to 00000000, size 0xae0a40
|
||||
=>print
|
||||
baudrate=115200
|
||||
|
@ -87,7 +91,7 @@ a trace:
|
|||
stdout=serial
|
||||
|
||||
Environment size: 117/8188 bytes
|
||||
=>host save host 0 trace 0 ${profoffset}
|
||||
=>host save hostfs - 1000000 trace ${profoffset}
|
||||
11405888 bytes written in 10 ms (1.1 GiB/s)
|
||||
=>reset
|
||||
|
||||
|
@ -96,18 +100,107 @@ Then run proftool to convert the trace information to ftrace format
|
|||
|
||||
.. code-block:: console
|
||||
|
||||
$ ./sandbox/tools/proftool -m sandbox/System.map -p trace dump-ftrace >trace.txt
|
||||
$ ./sandbox/tools/proftool -m sandbox/System.map -t trace dump-ftrace >trace.dat
|
||||
|
||||
Finally run pytimechart to display it
|
||||
Finally run kernelshark to display it (note it only works with `.dat` files!):
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ pytimechart trace.txt
|
||||
$ kernelshark trace.dat
|
||||
|
||||
Using this tool you can zoom and pan across the trace, with the function
|
||||
calls on the left and little marks representing the start and end of each
|
||||
Using this tool you can view the trace records and see the timestamp for each
|
||||
function.
|
||||
|
||||
.. image:: pics/kernelshark.png
|
||||
:width: 800
|
||||
:alt: Kernelshark showing function-trace records
|
||||
|
||||
|
||||
To see the records on the console, use trace-cmd:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ trace-cmd report trace.dat | less
|
||||
cpus=1
|
||||
u-boot-1 [000] 3.116364: function: initf_malloc
|
||||
u-boot-1 [000] 3.116375: function: initf_malloc
|
||||
u-boot-1 [000] 3.116386: function: initf_bootstage
|
||||
u-boot-1 [000] 3.116396: function: bootstage_init
|
||||
u-boot-1 [000] 3.116408: function: malloc
|
||||
u-boot-1 [000] 3.116418: function: malloc_simple
|
||||
u-boot-1 [000] 3.116429: function: alloc_simple
|
||||
u-boot-1 [000] 3.116441: function: alloc_simple
|
||||
u-boot-1 [000] 3.116449: function: malloc_simple
|
||||
u-boot-1 [000] 3.116457: function: malloc
|
||||
|
||||
Note that `pytimechart` is obsolete so cannot be used anymore.
|
||||
|
||||
There is a -f option available to select a function graph:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ ./sandbox/tools/proftool -m sandbox/System.map -t trace -f funcgraph dump-ftrace >trace.dat
|
||||
|
||||
Again, you can use kernelshark or trace-cmd to look at the output. In this case
|
||||
you will see the time taken by each function shown against its exit record.
|
||||
|
||||
.. image:: pics/kernelshark_fg.png
|
||||
:width: 800
|
||||
:alt: Kernelshark showing function-graph records
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ trace-cmd report trace.dat | less
|
||||
cpus=1
|
||||
u-boot-1 [000] 3.116364: funcgraph_entry: 0.011 us | initf_malloc();
|
||||
u-boot-1 [000] 3.116386: funcgraph_entry: | initf_bootstage() {
|
||||
u-boot-1 [000] 3.116396: funcgraph_entry: | bootstage_init() {
|
||||
u-boot-1 [000] 3.116408: funcgraph_entry: | malloc() {
|
||||
u-boot-1 [000] 3.116418: funcgraph_entry: | malloc_simple() {
|
||||
u-boot-1 [000] 3.116429: funcgraph_entry: 0.012 us | alloc_simple();
|
||||
u-boot-1 [000] 3.116449: funcgraph_exit: 0.031 us | }
|
||||
u-boot-1 [000] 3.116457: funcgraph_exit: 0.049 us | }
|
||||
u-boot-1 [000] 3.116466: funcgraph_entry: 0.063 us | memset();
|
||||
u-boot-1 [000] 3.116539: funcgraph_exit: 0.143 us | }
|
||||
|
||||
Flame graph
|
||||
-----------
|
||||
|
||||
Some simple flame graph options are available as well, using the dump-flamegraph
|
||||
command:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ ./sandbox/tools/proftool -m sandbox/System.map -t trace dump-flamegraph >trace.fg
|
||||
$ flamegraph.pl trace.fg >trace.svg
|
||||
|
||||
You can load the .svg file into a viewer. If you use Chrome (and some other
|
||||
programs) you can click around and zoom in and out.
|
||||
|
||||
.. image:: pics/flamegraph.png
|
||||
:width: 800
|
||||
:alt: Chrome showing the flamegraph.pl output
|
||||
|
||||
.. image:: pics/flamegraph_zoom.png
|
||||
:width: 800
|
||||
:alt: Chrome showing zooming into the flamegraph.pl output
|
||||
|
||||
|
||||
A timing variant is also available, which gives an idea of how much time is
|
||||
spend in each call stack:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ ./sandbox/tools/proftool -m sandbox/System.map -t trace dump-flamegraph -f timing >trace.fg
|
||||
$ flamegraph.pl trace.fg >trace.svg
|
||||
|
||||
Note that trace collection does slow down execution so the timings will be
|
||||
inflated. They should be used to guide optimisation. For accurate boot timings,
|
||||
use bootstage.
|
||||
|
||||
.. image:: pics/flamegraph_timing.png
|
||||
:width: 800
|
||||
:alt: Chrome showing flamegraph.pl output with timing
|
||||
|
||||
CONFIG Options
|
||||
--------------
|
||||
|
@ -138,6 +231,11 @@ CONFIG_TRACE_EARLY_SIZE
|
|||
CONFIG_TRACE_EARLY_ADDR
|
||||
Address of early trace buffer
|
||||
|
||||
CONFIG_TRACE_CALL_DEPTH_LIMIT
|
||||
Sets the limit on trace call-depth. For a broad view, 10 is typically
|
||||
sufficient. Setting this too large creates enormous traces and distorts
|
||||
the overall timing considerable.
|
||||
|
||||
|
||||
Building U-Boot with Tracing Enabled
|
||||
------------------------------------
|
||||
|
@ -148,6 +246,26 @@ instrumenting from the command line instead of having to change board
|
|||
config files.
|
||||
|
||||
|
||||
Board requirements
|
||||
------------------
|
||||
|
||||
Trace data collection relies on a microsecond timer, accessed through
|
||||
`timer_get_us()`. So the first thing you should do is make sure that
|
||||
this produces sensible results for your board. Suitable sources for
|
||||
this timer include high resolution timers, PWMs or profile timers if
|
||||
available. Most modern SOCs have a suitable timer for this.
|
||||
|
||||
See `add_ftrace()` for where `timer_get_us()` is called. The `notrace`
|
||||
attribute must be used on each function called by `timer_get_us()` since
|
||||
recursive calls to `add_ftrace()` will cause a fault::
|
||||
|
||||
trace: recursion detected, disabling
|
||||
|
||||
You cannot use driver model to obtain the microsecond timer, since tracing
|
||||
may be enabled before driver model is set up. Instead, provide a low-level
|
||||
function which accesses the timer, setting it up if needed.
|
||||
|
||||
|
||||
Collecting Trace Data
|
||||
---------------------
|
||||
|
||||
|
@ -155,21 +273,22 @@ When you run U-Boot on your board it will collect trace data up to the
|
|||
limit of the trace buffer size you have specified. Once that is exhausted
|
||||
no more data will be collected.
|
||||
|
||||
Collecting trace data has an affect on execution time/performance. You
|
||||
Collecting trace data affects execution time and performance. You
|
||||
will notice this particularly with trivial functions - the overhead of
|
||||
recording their execution may even exceed their normal execution time.
|
||||
In practice this doesn't matter much so long as you are aware of the
|
||||
effect. Once you have done your optimizations, turn off tracing before
|
||||
doing end-to-end timing.
|
||||
doing end-to-end timing using bootstage.
|
||||
|
||||
The best time to start tracing is right at the beginning of U-Boot. The
|
||||
best time to stop tracing is right at the end. In practice it is hard
|
||||
to achieve these ideals.
|
||||
|
||||
This implementation enables tracing early in board_init_f(). This means
|
||||
This implementation enables tracing early in `board_init_r()`, or
|
||||
`board_init_f()` when `TRACE_EARLY` is enabled. This means
|
||||
that it captures most of the board init process, missing only the
|
||||
early architecture-specific init. However, it also misses the entire
|
||||
SPL stage if there is one.
|
||||
SPL stage if there is one. At present tracing is not supported in SPL.
|
||||
|
||||
U-Boot typically ends with a 'bootm' command which loads and runs an
|
||||
OS. There is useful trace data in the execution of that bootm
|
||||
|
@ -179,39 +298,11 @@ the OS. In practical terms, U-Boot runs the 'fakegocmd' environment
|
|||
variable at this point. This variable should have a short script which
|
||||
collects the trace data and writes it somewhere.
|
||||
|
||||
Trace data collection relies on a microsecond timer, accessed through
|
||||
timer_get_us(). So the first think you should do is make sure that
|
||||
this produces sensible results for your board. Suitable sources for
|
||||
this timer include high resolution timers, PWMs or profile timers if
|
||||
available. Most modern SOCs have a suitable timer for this. Make sure
|
||||
that you mark this timer (and anything it calls) with
|
||||
notrace so that the trace library can
|
||||
use it without causing an infinite loop.
|
||||
Controlling the trace
|
||||
---------------------
|
||||
|
||||
|
||||
Commands
|
||||
--------
|
||||
|
||||
The trace command has variable sub-commands:
|
||||
|
||||
stats
|
||||
Display tracing statistics
|
||||
|
||||
pause
|
||||
Pause tracing
|
||||
|
||||
resume
|
||||
Resume tracing
|
||||
|
||||
funclist [<addr> <size>]
|
||||
Dump a list of functions into the buffer
|
||||
|
||||
calls [<addr> <size>]
|
||||
Dump function call trace into buffer
|
||||
|
||||
If the address and size are not given, these are obtained from environment
|
||||
variables (see below). In any case the environment variables are updated
|
||||
after the command runs.
|
||||
U-Boot provides a command-line interface to the trace system for controlling
|
||||
tracing and accessing the trace data. See :doc:`../usage/cmd/trace`.
|
||||
|
||||
|
||||
Environment Variables
|
||||
|
@ -264,39 +355,94 @@ a trace log to address 10000000 and sends it to a host machine using
|
|||
TFTP. After this, U-Boot will boot the OS normally, albeit a little
|
||||
later.
|
||||
|
||||
For a filesystem you may do something like::
|
||||
|
||||
Converting Trace Output Data
|
||||
----------------------------
|
||||
trace calls 10000000 1000000;
|
||||
save mmc 1:1 10000000 /trace ${profoffset}
|
||||
|
||||
The trace buffer format is internal to the trace system. It consists of a
|
||||
header, a call count for each function site, followed by a list of trace
|
||||
records, once for each function call.
|
||||
|
||||
|
||||
Converting Trace Output Data (proftool)
|
||||
---------------------------------------
|
||||
|
||||
The trace output data is kept in a binary format which is not documented
|
||||
here. To convert it into something useful, you can use proftool.
|
||||
here. See the `trace.h` header file if you are interested. To convert it into
|
||||
something useful, you can use proftool.
|
||||
|
||||
This tool must be given the U-Boot map file and the trace data received
|
||||
from running that U-Boot. It produces a text output file.
|
||||
from running that U-Boot. It produces a binary output file.
|
||||
|
||||
Options
|
||||
It is also possible to provide a configuration file to indicate which functions
|
||||
should be included or dropped during conversion. This file consists of lines
|
||||
like::
|
||||
|
||||
include-func <regex>
|
||||
exclude-func <regex>
|
||||
|
||||
where <regex> is a regular expression matched against function names. It
|
||||
allows some functions to be dropped from the trace when producing ftrace
|
||||
records.
|
||||
|
||||
Options:
|
||||
|
||||
-c <config_file>
|
||||
Specify the optional configuration file, to control which functions are
|
||||
included in the output.
|
||||
|
||||
-f <format>
|
||||
Specifies the format to use (see below)
|
||||
|
||||
-m <map_file>
|
||||
Specify U-Boot map file
|
||||
Specify U-Boot map file (`System.map`)
|
||||
|
||||
-p <trace_file>
|
||||
Specify profile/trace file
|
||||
-o <output file>
|
||||
Specify the output filename
|
||||
|
||||
-t <trace_file>
|
||||
Specify trace file, the data saved from U-Boot
|
||||
|
||||
-v <0-4>
|
||||
Specify the verbosity, where 0 is the minimum and 4 is for debugging.
|
||||
|
||||
Commands:
|
||||
|
||||
dump-ftrace
|
||||
Write a text dump of the file in Linux ftrace format to stdout
|
||||
dump-ftrace:
|
||||
Write a binary dump of the file in Linux ftrace format. Two options are
|
||||
available:
|
||||
|
||||
function
|
||||
write function-call records (caller/callee)
|
||||
|
||||
funcgraph
|
||||
write function entry/exit records (graph)
|
||||
|
||||
This format can be used with kernelshark_ and trace_cmd_.
|
||||
|
||||
dump-flamegraph
|
||||
Write a list of stack records useful for producing a flame graph. Two
|
||||
options are available:
|
||||
|
||||
calls
|
||||
create a flamegraph of stack frames
|
||||
|
||||
timing
|
||||
create a flamegraph of microseconds for each stack frame
|
||||
|
||||
This format can be used with flamegraph_pl_.
|
||||
|
||||
Viewing the Trace Data
|
||||
----------------------
|
||||
|
||||
You can use pytimechart for this (sudo apt-get pytimechart might work on
|
||||
your Debian-style machine, and use your favourite search engine to obtain
|
||||
documentation). It expects the file to have a .txt extension. The program
|
||||
has terse user interface but is very convenient for viewing U-Boot
|
||||
profile information.
|
||||
You can use kernelshark_ for a GUI, but note that version 2.0.x was broken. If
|
||||
you have that version you could try building it from source.
|
||||
|
||||
The file must have a .dat extension or it is ignored. The program has terse
|
||||
user interface but is very convenient for viewing U-Boot profile information.
|
||||
|
||||
Also available is trace_cmd_ which provides a command-line interface.
|
||||
|
||||
Workflow Suggestions
|
||||
--------------------
|
||||
|
@ -329,7 +475,9 @@ There are a few parameters in the code that you may want to consider.
|
|||
There is a function call depth limit (set to 15 by default). When the
|
||||
stack depth goes above this then no tracing information is recorded.
|
||||
The maximum depth reached is recorded and displayed by the 'trace stats'
|
||||
command.
|
||||
command. While it might be tempting to set the depth limit quite high, this
|
||||
can dramatically increase the size of the trace output as well as the execution
|
||||
time.
|
||||
|
||||
|
||||
Future Work
|
||||
|
@ -346,5 +494,10 @@ Some other features that might be useful:
|
|||
- Compression of trace information
|
||||
|
||||
|
||||
Simon Glass <sjg@chromium.org>
|
||||
April 2013
|
||||
.. sectionauthor:: Simon Glass <sjg@chromium.org>
|
||||
.. April 2013
|
||||
.. Updated January 2023
|
||||
|
||||
.. _kernelshark: https://kernelshark.org/
|
||||
.. _trace_cmd: https://www.trace-cmd.org/
|
||||
.. _flamegraph_pl: https://github.com/brendangregg/FlameGraph/blob/master/flamegraph.pl
|
||||
|
|
163
doc/usage/cmd/trace.rst
Normal file
163
doc/usage/cmd/trace.rst
Normal file
|
@ -0,0 +1,163 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0+:
|
||||
|
||||
trace command
|
||||
=============
|
||||
|
||||
Synopis
|
||||
-------
|
||||
|
||||
::
|
||||
|
||||
trace stats
|
||||
trace pause
|
||||
trace resume
|
||||
trace funclist [<addr> <size>]
|
||||
trace calls [<addr> <size>]
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The *trace* command is used to control the U-Boot tracing system. It allows
|
||||
tracing to be paused and resumed, shows statistics for traces and provides a
|
||||
way to dump out the trace information.
|
||||
|
||||
|
||||
trace stats
|
||||
~~~~~~~~~~~
|
||||
|
||||
This display tracing statistics, as follows:
|
||||
|
||||
function sites
|
||||
Functions are binned as a way of reducing the amount of space needed to
|
||||
hold all the function information. This is controlled by FUNC_SITE_SIZE in
|
||||
the trace.h header file. The usual value is 4, which provides the finest
|
||||
granularity (assuming a minimum instruction size of 4 bytes) which means
|
||||
that every function can be resolved individually.
|
||||
|
||||
function calls
|
||||
Total number of function calls, including those which were not traced due
|
||||
to buffer space. This count does not include functions which exceeded the
|
||||
depth limit.
|
||||
|
||||
untracked function calls
|
||||
Total number of function calls which did not appear in the U-Boot image.
|
||||
This can happen if a function is called outside the normal code area.
|
||||
|
||||
traced function calls
|
||||
Total number of function calls which were actually traced, i.e. are included
|
||||
in the recorded trace data.
|
||||
|
||||
dropped due to overflow
|
||||
If the trace buffer was exhausted then this shows the number of records that
|
||||
were dropped. Try reducing the depth limit or expanding the buffer size.
|
||||
|
||||
maximum observed call depth
|
||||
Maximum observed call depth while tracing.
|
||||
|
||||
calls not traced due to depth
|
||||
Counts the number of function calls that were not recorded because they
|
||||
exceeded the maximum call depth.
|
||||
|
||||
max function calls
|
||||
Maximum number of function calls which can be recorded in the trace buffer,
|
||||
given its size. Once `function calls` hits this value, recording stops.
|
||||
|
||||
trace buffer
|
||||
Address of trace buffer
|
||||
|
||||
call records
|
||||
Address of first trace record. This is near the start of the trace buffer,
|
||||
after the function-call counts.
|
||||
|
||||
|
||||
trace pause
|
||||
~~~~~~~~~~~
|
||||
|
||||
Pauses tracing, so that no more data is added to the trace buffer.
|
||||
|
||||
|
||||
trace resume
|
||||
~~~~~~~~~~~~
|
||||
|
||||
Resumes tracing, so that new function calls are added to the trace buffer if
|
||||
there is sufficient space.
|
||||
|
||||
|
||||
trace funclist [<addr> <size>]
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Dumps a list of functions into the provided buffer. The file uses a format
|
||||
specific to U-Boot: a header, following by the function offset and call count.
|
||||
|
||||
If the address and size are not given, these are obtained from
|
||||
:ref:`develop/trace:environment variables`. In any case the environment
|
||||
variables are updated after the command runs.
|
||||
|
||||
The resulting data should be written out to the host, e.g. using Ethernet or
|
||||
a filesystem. There are no tools provided to read this sdata.
|
||||
|
||||
|
||||
trace calls [<addr> <size>]
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Dumps a list of function calls into the provided buffer. The file uses a format
|
||||
specific to U-Boot: a header, following by the list of calls. The proftool
|
||||
tool can be used to convert this information ready for further analysis.
|
||||
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
::
|
||||
|
||||
=> trace stats
|
||||
269,252 function sites
|
||||
38,025,059 function calls
|
||||
3 untracked function calls
|
||||
7,382,690 traced function calls
|
||||
17 maximum observed call depth
|
||||
15 call depth limit
|
||||
68,667,432 calls not traced due to depth
|
||||
22,190,112 max function calls
|
||||
|
||||
trace buffer 6c000000 call records 6c20de78
|
||||
=> trace resume
|
||||
=> trace pause
|
||||
|
||||
This shows that resuming the trace causes the buffer to overflow::
|
||||
|
||||
=> trace stats
|
||||
269,252 function sites
|
||||
49,573,694 function calls
|
||||
3 untracked function calls
|
||||
22,190,112 traced function calls (8289848 dropped due to overflow)
|
||||
17 maximum observed call depth
|
||||
15 call depth limit
|
||||
68,667,432 calls not traced due to depth
|
||||
22,190,112 max function calls
|
||||
|
||||
trace buffer 6c000000 call records 6c20de78
|
||||
=> trace funcs 30000000 0x100000
|
||||
Function trace dumped to 30000000, size 0x1e70
|
||||
|
||||
This shows collecting and writing out the result trace data:
|
||||
|
||||
::
|
||||
=> trace calls 20000000 0x10000000
|
||||
Call list dumped to 20000000, size 0xfdf21a0
|
||||
=> save mmc 1:1 20000000 /trace ${profoffset}
|
||||
File System is consistent
|
||||
file found, deleting
|
||||
update journal finished
|
||||
File System is consistent
|
||||
update journal finished
|
||||
266281376 bytes written in 18584 ms (13.7 MiB/s)
|
||||
|
||||
From here you can use proftool to convert it:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tools/proftool -m System.map -t trace -o asc.fg dump-ftrace
|
||||
|
||||
|
||||
.. _`ACPI specification`: https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
|
|
@ -84,6 +84,7 @@ Shell commands
|
|||
cmd/sm
|
||||
cmd/temperature
|
||||
cmd/tftpput
|
||||
cmd/trace
|
||||
cmd/true
|
||||
cmd/ums
|
||||
cmd/ut
|
||||
|
|
|
@ -654,7 +654,8 @@ void *dev_get_priv(const struct udevice *dev)
|
|||
return dm_priv_to_rw(dev->priv_);
|
||||
}
|
||||
|
||||
void *dev_get_uclass_priv(const struct udevice *dev)
|
||||
/* notrace is needed as this is called by timer_get_rate() */
|
||||
notrace void *dev_get_uclass_priv(const struct udevice *dev)
|
||||
{
|
||||
if (!dev) {
|
||||
dm_warn("%s: null device\n", __func__);
|
||||
|
|
|
@ -136,7 +136,7 @@ u64 timer_conv_64(u32 count)
|
|||
return ((u64)gd->timebase_h << 32) | gd->timebase_l;
|
||||
}
|
||||
|
||||
int notrace dm_timer_init(void)
|
||||
int dm_timer_init(void)
|
||||
{
|
||||
struct udevice *dev = NULL;
|
||||
__maybe_unused ofnode node;
|
||||
|
|
|
@ -9,11 +9,16 @@
|
|||
#define timer_get_ops(dev) ((struct timer_ops *)(dev)->driver->ops)
|
||||
|
||||
/**
|
||||
* dm_timer_init() - initialize a timer for time keeping. On success
|
||||
* initializes gd->timer so that lib/timer can use it for future
|
||||
* referrence.
|
||||
* dm_timer_init() - set up a timer for time keeping
|
||||
*
|
||||
* Return: 0 on success or error number
|
||||
* Sets up gd->timer if the device is not already bound, making sure it is
|
||||
* probed and ready for use
|
||||
*
|
||||
* On success, inits gd->timer so that lib/timer can use it for future reference
|
||||
*
|
||||
* Returns: 0 on success, -EAGAIN if driver model is not ready yet, -ENODEV if
|
||||
* no timer could be found, other error if the timer could not be bound or
|
||||
* probed
|
||||
*/
|
||||
int dm_timer_init(void);
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef __TRACE_H
|
||||
#define __TRACE_H
|
||||
|
||||
/* this file is included from a tool so uses uint32_t instead of u32, etc. */
|
||||
|
||||
enum {
|
||||
/*
|
||||
* This affects the granularity of our trace. We can bin function
|
||||
|
@ -15,14 +17,18 @@ enum {
|
|||
*
|
||||
* The value here assumes a minimum instruction size of 4 bytes,
|
||||
* or that instructions are 2 bytes but there are at least 2 of
|
||||
* them in every function.
|
||||
* them in every function. Given that each function needs a call to
|
||||
* __cyg_profile_func_enter() and __cyg_profile_func_exit() as well,
|
||||
* we cannot have functions smaller that 16 bytes.
|
||||
*
|
||||
* Increasing this value reduces the number of functions we can
|
||||
* resolve, but reduces the size of the uintptr_t array used for
|
||||
* our function list, which is the length of the code divided by
|
||||
* this value.
|
||||
*/
|
||||
FUNC_SITE_SIZE = 4, /* distance between function sites */
|
||||
FUNC_SITE_SIZE = 16, /* distance between function sites */
|
||||
|
||||
TRACE_VERSION = 1,
|
||||
};
|
||||
|
||||
enum trace_chunk_type {
|
||||
|
@ -39,7 +45,11 @@ struct trace_output_func {
|
|||
/* A header at the start of the trace output buffer */
|
||||
struct trace_output_hdr {
|
||||
enum trace_chunk_type type; /* Record type */
|
||||
size_t rec_count; /* Number of records */
|
||||
uint32_t version; /* Version (TRACE_VERSION) */
|
||||
uint32_t rec_count; /* Number of records */
|
||||
uint32_t spare; /* 0 */
|
||||
uint64_t text_base; /* Value of CONFIG_TEXT_BASE */
|
||||
uint64_t spare2; /* 0 */
|
||||
};
|
||||
|
||||
/* Print statistics about traced function calls */
|
||||
|
@ -63,7 +73,7 @@ int trace_list_functions(void *buff, size_t buff_size, size_t *needed);
|
|||
enum ftrace_flags {
|
||||
FUNCF_EXIT = 0UL << 30,
|
||||
FUNCF_ENTRY = 1UL << 30,
|
||||
FUNCF_TEXTBASE = 2UL << 30,
|
||||
/* two more values are available */
|
||||
|
||||
FUNCF_TIMESTAMP_MASK = 0x3fffffff,
|
||||
};
|
||||
|
|
|
@ -368,7 +368,7 @@ config TRACE_EARLY_SIZE
|
|||
config TRACE_EARLY_CALL_DEPTH_LIMIT
|
||||
int "Early trace call depth limit"
|
||||
depends on TRACE_EARLY
|
||||
default 200
|
||||
default 15
|
||||
help
|
||||
Sets the maximum call depth up to which function calls are recorded
|
||||
during early tracing.
|
||||
|
|
21
lib/abuf.c
21
lib/abuf.c
|
@ -6,11 +6,14 @@
|
|||
* Written by Simon Glass <sjg@chromium.org>
|
||||
*/
|
||||
|
||||
#ifndef USE_HOSTCC
|
||||
#include <common.h>
|
||||
#include <abuf.h>
|
||||
#include <malloc.h>
|
||||
#include <mapmem.h>
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include <abuf.h>
|
||||
|
||||
void abuf_set(struct abuf *abuf, void *data, size_t size)
|
||||
{
|
||||
|
@ -19,10 +22,26 @@ void abuf_set(struct abuf *abuf, void *data, size_t size)
|
|||
abuf->size = size;
|
||||
}
|
||||
|
||||
#ifndef USE_HOSTCC
|
||||
void abuf_map_sysmem(struct abuf *abuf, ulong addr, size_t size)
|
||||
{
|
||||
abuf_set(abuf, map_sysmem(addr, size), size);
|
||||
}
|
||||
#else
|
||||
/* copied from lib/string.c for convenience */
|
||||
static char *memdup(const void *src, size_t len)
|
||||
{
|
||||
char *p;
|
||||
|
||||
p = malloc(len);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p, src, len);
|
||||
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool abuf_realloc(struct abuf *abuf, size_t new_size)
|
||||
{
|
||||
|
|
14
lib/time.c
14
lib/time.c
|
@ -70,15 +70,14 @@ extern unsigned long timer_read_counter(void);
|
|||
ulong notrace get_tbclk(void)
|
||||
{
|
||||
if (!gd->timer) {
|
||||
#ifdef CONFIG_TIMER_EARLY
|
||||
return timer_early_get_rate();
|
||||
#else
|
||||
int ret;
|
||||
|
||||
if (IS_ENABLED(CONFIG_TIMER_EARLY))
|
||||
return timer_early_get_rate();
|
||||
|
||||
ret = dm_timer_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
return timer_get_rate(gd->timer);
|
||||
|
@ -90,15 +89,14 @@ uint64_t notrace get_ticks(void)
|
|||
int ret;
|
||||
|
||||
if (!gd->timer) {
|
||||
#ifdef CONFIG_TIMER_EARLY
|
||||
return timer_early_get_count();
|
||||
#else
|
||||
int ret;
|
||||
|
||||
if (IS_ENABLED(CONFIG_TIMER_EARLY))
|
||||
return timer_early_get_count();
|
||||
|
||||
ret = dm_timer_init();
|
||||
if (ret)
|
||||
panic("Could not initialize timer (err %d)\n", ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = timer_get_count(gd->timer, &count);
|
||||
|
|
93
lib/trace.c
93
lib/trace.c
|
@ -35,9 +35,11 @@ struct trace_hdr {
|
|||
ulong ftrace_count; /* Num. of ftrace records written */
|
||||
ulong ftrace_too_deep_count; /* Functions that were too deep */
|
||||
|
||||
int depth;
|
||||
int depth_limit;
|
||||
int max_depth;
|
||||
int depth; /* Depth of function calls */
|
||||
int depth_limit; /* Depth limit to trace to */
|
||||
int max_depth; /* Maximum depth seen so far */
|
||||
int min_depth; /* Minimum depth seen so far */
|
||||
bool trace_locked; /* Used to detect recursive tracing */
|
||||
};
|
||||
|
||||
/* Pointer to start of trace buffer */
|
||||
|
@ -118,18 +120,6 @@ static void notrace add_ftrace(void *func_ptr, void *caller, ulong flags)
|
|||
hdr->ftrace_count++;
|
||||
}
|
||||
|
||||
static void notrace add_textbase(void)
|
||||
{
|
||||
if (hdr->ftrace_count < hdr->ftrace_size) {
|
||||
struct trace_call *rec = &hdr->ftrace[hdr->ftrace_count];
|
||||
|
||||
rec->func = CONFIG_TEXT_BASE;
|
||||
rec->caller = 0;
|
||||
rec->flags = FUNCF_TEXTBASE;
|
||||
}
|
||||
hdr->ftrace_count++;
|
||||
}
|
||||
|
||||
/**
|
||||
* __cyg_profile_func_enter() - record function entry
|
||||
*
|
||||
|
@ -144,6 +134,14 @@ void notrace __cyg_profile_func_enter(void *func_ptr, void *caller)
|
|||
if (trace_enabled) {
|
||||
int func;
|
||||
|
||||
if (hdr->trace_locked) {
|
||||
trace_enabled = 0;
|
||||
puts("trace: recursion detected, disabling\n");
|
||||
hdr->trace_locked = false;
|
||||
return;
|
||||
}
|
||||
|
||||
hdr->trace_locked = true;
|
||||
trace_swap_gd();
|
||||
add_ftrace(func_ptr, caller, FUNCF_ENTRY);
|
||||
func = func_ptr_to_num(func_ptr);
|
||||
|
@ -154,9 +152,10 @@ void notrace __cyg_profile_func_enter(void *func_ptr, void *caller)
|
|||
hdr->untracked_count++;
|
||||
}
|
||||
hdr->depth++;
|
||||
if (hdr->depth > hdr->depth_limit)
|
||||
if (hdr->depth > hdr->max_depth)
|
||||
hdr->max_depth = hdr->depth;
|
||||
trace_swap_gd();
|
||||
hdr->trace_locked = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,8 +169,10 @@ void notrace __cyg_profile_func_exit(void *func_ptr, void *caller)
|
|||
{
|
||||
if (trace_enabled) {
|
||||
trace_swap_gd();
|
||||
add_ftrace(func_ptr, caller, FUNCF_EXIT);
|
||||
hdr->depth--;
|
||||
add_ftrace(func_ptr, caller, FUNCF_EXIT);
|
||||
if (hdr->depth < hdr->min_depth)
|
||||
hdr->min_depth = hdr->depth;
|
||||
trace_swap_gd();
|
||||
}
|
||||
}
|
||||
|
@ -278,8 +279,11 @@ int trace_list_calls(void *buff, size_t buff_size, size_t *needed)
|
|||
|
||||
/* Update the header */
|
||||
if (output_hdr) {
|
||||
memset(output_hdr, '\0', sizeof(*output_hdr));
|
||||
output_hdr->rec_count = upto;
|
||||
output_hdr->type = TRACE_CHUNK_CALLS;
|
||||
output_hdr->version = TRACE_VERSION;
|
||||
output_hdr->text_base = CONFIG_TEXT_BASE;
|
||||
}
|
||||
|
||||
/* Work out how must of the buffer we used */
|
||||
|
@ -318,11 +322,17 @@ void trace_print_stats(void)
|
|||
printf(" (%lu dropped due to overflow)",
|
||||
hdr->ftrace_count - hdr->ftrace_size);
|
||||
}
|
||||
puts("\n");
|
||||
printf("%15d maximum observed call depth\n", hdr->max_depth);
|
||||
|
||||
/* Add in minimum depth since the trace did not start at top level */
|
||||
printf("\n%15d maximum observed call depth\n",
|
||||
hdr->max_depth - hdr->min_depth);
|
||||
printf("%15d call depth limit\n", hdr->depth_limit);
|
||||
print_grouped_ull(hdr->ftrace_too_deep_count, 10);
|
||||
puts(" calls not traced due to depth\n");
|
||||
print_grouped_ull(hdr->ftrace_size, 10);
|
||||
puts(" max function calls\n");
|
||||
printf("\ntrace buffer %lx call records %lx\n",
|
||||
(ulong)map_to_sysmem(hdr), (ulong)map_to_sysmem(hdr->ftrace));
|
||||
}
|
||||
|
||||
void notrace trace_set_enabled(int enabled)
|
||||
|
@ -330,6 +340,17 @@ void notrace trace_set_enabled(int enabled)
|
|||
trace_enabled = enabled != 0;
|
||||
}
|
||||
|
||||
static int get_func_count(void)
|
||||
{
|
||||
/* Detect no support for mon_len since this means tracing cannot work */
|
||||
if (IS_ENABLED(CONFIG_SANDBOX) && !gd->mon_len) {
|
||||
puts("Tracing is not supported on this board\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
return gd->mon_len / FUNC_SITE_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* trace_init() - initialize the tracing system and enable it
|
||||
*
|
||||
|
@ -339,16 +360,18 @@ void notrace trace_set_enabled(int enabled)
|
|||
*/
|
||||
int notrace trace_init(void *buff, size_t buff_size)
|
||||
{
|
||||
ulong func_count = gd->mon_len / FUNC_SITE_SIZE;
|
||||
int func_count = get_func_count();
|
||||
size_t needed;
|
||||
int was_disabled = !trace_enabled;
|
||||
|
||||
if (func_count < 0)
|
||||
return func_count;
|
||||
trace_save_gd();
|
||||
|
||||
if (!was_disabled) {
|
||||
#ifdef CONFIG_TRACE_EARLY
|
||||
ulong used, count;
|
||||
char *end;
|
||||
ulong used;
|
||||
|
||||
/*
|
||||
* Copy over the early trace data if we have it. Disable
|
||||
|
@ -357,12 +380,19 @@ int notrace trace_init(void *buff, size_t buff_size)
|
|||
trace_enabled = 0;
|
||||
hdr = map_sysmem(CONFIG_TRACE_EARLY_ADDR,
|
||||
CONFIG_TRACE_EARLY_SIZE);
|
||||
end = (char *)&hdr->ftrace[min(hdr->ftrace_count,
|
||||
hdr->ftrace_size)];
|
||||
count = min(hdr->ftrace_count, hdr->ftrace_size);
|
||||
end = (char *)&hdr->ftrace[count];
|
||||
used = end - (char *)hdr;
|
||||
printf("trace: copying %08lx bytes of early data from %x to %08lx\n",
|
||||
used, CONFIG_TRACE_EARLY_ADDR,
|
||||
(ulong)map_to_sysmem(buff));
|
||||
printf("%lu traced function calls", count);
|
||||
if (hdr->ftrace_count > hdr->ftrace_size) {
|
||||
printf(" (%lu dropped due to overflow)",
|
||||
hdr->ftrace_count - hdr->ftrace_size);
|
||||
hdr->ftrace_count = hdr->ftrace_size;
|
||||
}
|
||||
puts("\n");
|
||||
memcpy(buff, hdr, used);
|
||||
#else
|
||||
puts("trace: already enabled\n");
|
||||
|
@ -372,23 +402,24 @@ int notrace trace_init(void *buff, size_t buff_size)
|
|||
hdr = (struct trace_hdr *)buff;
|
||||
needed = sizeof(*hdr) + func_count * sizeof(uintptr_t);
|
||||
if (needed > buff_size) {
|
||||
printf("trace: buffer size %zd bytes: at least %zd needed\n",
|
||||
printf("trace: buffer size %zx bytes: at least %zx needed\n",
|
||||
buff_size, needed);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (was_disabled)
|
||||
if (was_disabled) {
|
||||
memset(hdr, '\0', needed);
|
||||
hdr->min_depth = INT_MAX;
|
||||
}
|
||||
hdr->func_count = func_count;
|
||||
hdr->call_accum = (uintptr_t *)(hdr + 1);
|
||||
|
||||
/* Use any remaining space for the timed function trace */
|
||||
hdr->ftrace = (struct trace_call *)(buff + needed);
|
||||
hdr->ftrace_size = (buff_size - needed) / sizeof(*hdr->ftrace);
|
||||
add_textbase();
|
||||
hdr->depth_limit = CONFIG_TRACE_CALL_DEPTH_LIMIT;
|
||||
|
||||
puts("trace: enabled\n");
|
||||
hdr->depth_limit = CONFIG_TRACE_CALL_DEPTH_LIMIT;
|
||||
trace_enabled = 1;
|
||||
trace_inited = 1;
|
||||
|
||||
|
@ -403,10 +434,12 @@ int notrace trace_init(void *buff, size_t buff_size)
|
|||
*/
|
||||
int notrace trace_early_init(void)
|
||||
{
|
||||
ulong func_count = gd->mon_len / FUNC_SITE_SIZE;
|
||||
int func_count = get_func_count();
|
||||
size_t buff_size = CONFIG_TRACE_EARLY_SIZE;
|
||||
size_t needed;
|
||||
|
||||
if (func_count < 0)
|
||||
return func_count;
|
||||
/* We can ignore additional calls to this function */
|
||||
if (trace_enabled)
|
||||
return 0;
|
||||
|
@ -414,7 +447,7 @@ int notrace trace_early_init(void)
|
|||
hdr = map_sysmem(CONFIG_TRACE_EARLY_ADDR, CONFIG_TRACE_EARLY_SIZE);
|
||||
needed = sizeof(*hdr) + func_count * sizeof(uintptr_t);
|
||||
if (needed > buff_size) {
|
||||
printf("trace: buffer size is %zd bytes, at least %zd needed\n",
|
||||
printf("trace: buffer size is %zx bytes, at least %zx needed\n",
|
||||
buff_size, needed);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
@ -422,11 +455,11 @@ int notrace trace_early_init(void)
|
|||
memset(hdr, '\0', needed);
|
||||
hdr->call_accum = (uintptr_t *)(hdr + 1);
|
||||
hdr->func_count = func_count;
|
||||
hdr->min_depth = INT_MAX;
|
||||
|
||||
/* Use any remaining space for the timed function trace */
|
||||
hdr->ftrace = (struct trace_call *)((char *)hdr + needed);
|
||||
hdr->ftrace_size = (buff_size - needed) / sizeof(*hdr->ftrace);
|
||||
add_textbase();
|
||||
hdr->depth_limit = CONFIG_TRACE_EARLY_CALL_DEPTH_LIMIT;
|
||||
printf("trace: early enable at %08x\n", CONFIG_TRACE_EARLY_ADDR);
|
||||
|
||||
|
|
304
test/py/tests/test_trace.py
Normal file
304
test/py/tests/test_trace.py
Normal file
|
@ -0,0 +1,304 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Copyright 2022 Google LLC
|
||||
# Written by Simon Glass <sjg@chromium.org>
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import re
|
||||
|
||||
import u_boot_utils as util
|
||||
|
||||
# This is needed for Azure, since the default '..' directory is not writeable
|
||||
TMPDIR = '/tmp/test_trace'
|
||||
|
||||
# Decode a function-graph line
|
||||
RE_LINE = re.compile(r'.*\[000\]\s*([0-9.]*): func.*[|](\s*)(\S.*)?([{};])$')
|
||||
|
||||
|
||||
def collect_trace(cons):
|
||||
"""Build U-Boot and run it to collect a trace
|
||||
|
||||
Args:
|
||||
cons (ConsoleBase): U-Boot console
|
||||
|
||||
Returns:
|
||||
tuple:
|
||||
str: Filename of the output trace file
|
||||
int: Microseconds taken for initf_dm according to bootstage
|
||||
"""
|
||||
cons.run_command('trace pause')
|
||||
out = cons.run_command('trace stats')
|
||||
|
||||
# The output is something like this:
|
||||
# 251,003 function sites
|
||||
# 1,160,283 function calls
|
||||
# 0 untracked function calls
|
||||
# 1,230,758 traced function calls (341538 dropped due to overflow)
|
||||
# 33 maximum observed call depth
|
||||
# 15 call depth limit
|
||||
# 748,268 calls not traced due to depth
|
||||
# 1,230,758 max function calls
|
||||
|
||||
# Get a dict of values from the output
|
||||
lines = [line.split(maxsplit=1) for line in out.splitlines() if line]
|
||||
vals = {key: val.replace(',', '') for val, key in lines}
|
||||
|
||||
assert int(vals['function sites']) > 100000
|
||||
assert int(vals['function calls']) > 200000
|
||||
assert int(vals['untracked function calls']) == 0
|
||||
assert int(vals['maximum observed call depth']) > 30
|
||||
assert (vals['call depth limit'] ==
|
||||
cons.config.buildconfig.get('config_trace_call_depth_limit'))
|
||||
assert int(vals['calls not traced due to depth']) > 100000
|
||||
|
||||
out = cons.run_command('bootstage report')
|
||||
# Accumulated time:
|
||||
# 19,104 dm_r
|
||||
# 23,078 of_live
|
||||
# 46,280 dm_f
|
||||
dm_f_time = [line.split()[0] for line in out.replace(',', '').splitlines()
|
||||
if 'dm_f' in line]
|
||||
|
||||
# Read out the trace data
|
||||
addr = 0x02000000
|
||||
size = 0x01000000
|
||||
out = cons.run_command(f'trace calls {addr:x} {size:x}')
|
||||
print(out)
|
||||
fname = os.path.join(TMPDIR, 'trace')
|
||||
out = cons.run_command(
|
||||
'host save hostfs - %x %s ${profoffset}' % (addr, fname))
|
||||
return fname, int(dm_f_time[0])
|
||||
|
||||
|
||||
def check_function(cons, fname, proftool, map_fname, trace_dat):
|
||||
"""Check that the 'function' output works
|
||||
|
||||
Args:
|
||||
cons (ConsoleBase): U-Boot console
|
||||
fname (str): Filename of trace file
|
||||
proftool (str): Filename of proftool
|
||||
map_fname (str): Filename of System.map
|
||||
trace_dat (str): Filename of output file
|
||||
"""
|
||||
out = util.run_and_log(
|
||||
cons, [proftool, '-t', fname, '-o', trace_dat, '-m', map_fname,
|
||||
'dump-ftrace'])
|
||||
|
||||
# Check that trace-cmd can read it
|
||||
out = util.run_and_log(cons, ['trace-cmd', 'dump', trace_dat])
|
||||
|
||||
# Tracing meta data in file /tmp/test_trace/trace.dat:
|
||||
# [Initial format]
|
||||
# 6 [Version]
|
||||
# 0 [Little endian]
|
||||
# 4 [Bytes in a long]
|
||||
# 4096 [Page size, bytes]
|
||||
# [Header page, 205 bytes]
|
||||
# [Header event, 205 bytes]
|
||||
# [Ftrace format, 3 events]
|
||||
# [Events format, 0 systems]
|
||||
# [Kallsyms, 342244 bytes]
|
||||
# [Trace printk, 0 bytes]
|
||||
# [Saved command lines, 9 bytes]
|
||||
# 1 [CPUs with tracing data]
|
||||
# [6 options]
|
||||
# [Flyrecord tracing data]
|
||||
# [Tracing clock]
|
||||
# [local] global counter uptime perf mono mono_raw boot x86-tsc
|
||||
assert '[Flyrecord tracing data]' in out
|
||||
assert '4096 [Page size, bytes]' in out
|
||||
kallsyms = [line.split() for line in out.splitlines() if 'Kallsyms' in line]
|
||||
# [['[Kallsyms,', '342244', 'bytes]']]
|
||||
val = int(kallsyms[0][1])
|
||||
assert val > 50000 # Should be at least 50KB of symbols
|
||||
|
||||
# Check that the trace has something useful
|
||||
cmd = f"trace-cmd report {trace_dat} |grep -E '(initf_|initr_)'"
|
||||
out = util.run_and_log(cons, ['sh', '-c', cmd])
|
||||
|
||||
# Format:
|
||||
# unknown option 14
|
||||
# u-boot-1 [000] 60.805596: function: initf_malloc
|
||||
# u-boot-1 [000] 60.805597: function: initf_malloc
|
||||
# u-boot-1 [000] 60.805601: function: initf_bootstage
|
||||
# u-boot-1 [000] 60.805607: function: initf_bootstage
|
||||
lines = [line.replace(':', '').split() for line in out.splitlines()]
|
||||
vals = {items[4]: float(items[2]) for items in lines if len(items) == 5}
|
||||
base = None
|
||||
max_delta = 0
|
||||
for timestamp in vals.values():
|
||||
if base:
|
||||
max_delta = max(max_delta, timestamp - base)
|
||||
else:
|
||||
base = timestamp
|
||||
|
||||
# Check for some expected functions
|
||||
assert 'initf_malloc' in vals.keys()
|
||||
assert 'initr_watchdog' in vals.keys()
|
||||
assert 'initr_dm' in vals.keys()
|
||||
|
||||
# All the functions should be executed within five seconds at most
|
||||
assert max_delta < 5
|
||||
|
||||
|
||||
def check_funcgraph(cons, fname, proftool, map_fname, trace_dat):
|
||||
"""Check that the 'funcgraph' output works
|
||||
|
||||
Args:
|
||||
cons (ConsoleBase): U-Boot console
|
||||
fname (str): Filename of trace file
|
||||
proftool (str): Filename of proftool
|
||||
map_fname (str): Filename of System.map
|
||||
trace_dat (str): Filename of output file
|
||||
|
||||
Returns:
|
||||
int: Time taken by the first part of the initf_dm() function, in us
|
||||
"""
|
||||
|
||||
# Generate the funcgraph format
|
||||
out = util.run_and_log(
|
||||
cons, [proftool, '-t', fname, '-o', trace_dat, '-m', map_fname,
|
||||
'dump-ftrace', '-f', 'funcgraph'])
|
||||
|
||||
# Check that the trace has what we expect
|
||||
cmd = f'trace-cmd report {trace_dat} |head -n 70'
|
||||
out = util.run_and_log(cons, ['sh', '-c', cmd])
|
||||
|
||||
# First look for this:
|
||||
# u-boot-1 [000] 282.101360: funcgraph_entry: 0.004 us | initf_malloc();
|
||||
# ...
|
||||
# u-boot-1 [000] 282.101369: funcgraph_entry: | initf_bootstage() {
|
||||
# u-boot-1 [000] 282.101369: funcgraph_entry: | bootstage_init() {
|
||||
# u-boot-1 [000] 282.101369: funcgraph_entry: | dlmalloc() {
|
||||
# ...
|
||||
# u-boot-1 [000] 282.101375: funcgraph_exit: 0.001 us | }
|
||||
# Then look for this:
|
||||
# u-boot-1 [000] 282.101375: funcgraph_exit: 0.006 us | }
|
||||
# Then check for this:
|
||||
# u-boot-1 [000] 282.101375: funcgraph_entry: 0.000 us | event_init();
|
||||
|
||||
expected_indent = None
|
||||
found_start = False
|
||||
found_end = False
|
||||
upto = None
|
||||
|
||||
# Look for initf_bootstage() entry and make sure we see the exit
|
||||
# Collect the time for initf_dm()
|
||||
for line in out.splitlines():
|
||||
m = RE_LINE.match(line)
|
||||
if m:
|
||||
timestamp, indent, func, brace = m.groups()
|
||||
if found_end:
|
||||
upto = func
|
||||
break
|
||||
elif func == 'initf_bootstage() ':
|
||||
found_start = True
|
||||
expected_indent = indent + ' '
|
||||
elif found_start and indent == expected_indent and brace == '}':
|
||||
found_end = True
|
||||
|
||||
# The next function after initf_bootstage() exits should be event_init()
|
||||
assert upto == 'event_init()'
|
||||
|
||||
# Now look for initf_dm() and dm_timer_init() so we can check the bootstage
|
||||
# time
|
||||
cmd = f"trace-cmd report {trace_dat} |grep -E '(initf_dm|dm_timer_init)'"
|
||||
out = util.run_and_log(cons, ['sh', '-c', cmd])
|
||||
|
||||
start_timestamp = None
|
||||
end_timestamp = None
|
||||
for line in out.splitlines():
|
||||
m = RE_LINE.match(line)
|
||||
if m:
|
||||
timestamp, indent, func, brace = m.groups()
|
||||
if func == 'initf_dm() ':
|
||||
start_timestamp = timestamp
|
||||
elif func == 'dm_timer_init() ':
|
||||
end_timestamp = timestamp
|
||||
break
|
||||
assert start_timestamp and end_timestamp
|
||||
|
||||
# Convert the time to microseconds
|
||||
return int((float(end_timestamp) - float(start_timestamp)) * 1000000)
|
||||
|
||||
|
||||
def check_flamegraph(cons, fname, proftool, map_fname, trace_fg):
|
||||
"""Check that the 'flamegraph' output works
|
||||
|
||||
This spot checks a few call counts and estimates the time taken by the
|
||||
initf_dm() function
|
||||
|
||||
Args:
|
||||
cons (ConsoleBase): U-Boot console
|
||||
fname (str): Filename of trace file
|
||||
proftool (str): Filename of proftool
|
||||
map_fname (str): Filename of System.map
|
||||
trace_fg (str): Filename of output file
|
||||
|
||||
Returns:
|
||||
int: Approximate number of microseconds used by the initf_dm() function
|
||||
"""
|
||||
|
||||
# Generate the flamegraph format
|
||||
out = util.run_and_log(
|
||||
cons, [proftool, '-t', fname, '-o', trace_fg, '-m', map_fname,
|
||||
'dump-flamegraph'])
|
||||
|
||||
# We expect dm_timer_init() to be called twice: once before relocation and
|
||||
# once after
|
||||
look1 = 'initf_dm;dm_timer_init 1'
|
||||
look2 = 'board_init_r;initr_dm_devices;dm_timer_init 1'
|
||||
found = 0
|
||||
with open(trace_fg, 'r') as fd:
|
||||
for line in fd:
|
||||
line = line.strip()
|
||||
if line == look1 or line == look2:
|
||||
found += 1
|
||||
assert found == 2
|
||||
|
||||
# Generate the timing graph
|
||||
out = util.run_and_log(
|
||||
cons, [proftool, '-t', fname, '-o', trace_fg, '-m', map_fname,
|
||||
'dump-flamegraph', '-f', 'timing'])
|
||||
|
||||
# Add up all the time spend in initf_dm() and its children
|
||||
total = 0
|
||||
with open(trace_fg, 'r') as fd:
|
||||
for line in fd:
|
||||
line = line.strip()
|
||||
if line.startswith('initf_dm'):
|
||||
func, val = line.split()
|
||||
count = int(val)
|
||||
total += count
|
||||
return total
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.boardspec('sandbox')
|
||||
@pytest.mark.buildconfigspec('trace')
|
||||
def test_trace(u_boot_console):
|
||||
"""Test we can build sandbox with trace, collect and process a trace"""
|
||||
cons = u_boot_console
|
||||
|
||||
if not os.path.exists(TMPDIR):
|
||||
os.mkdir(TMPDIR)
|
||||
proftool = os.path.join(cons.config.build_dir, 'tools', 'proftool')
|
||||
map_fname = os.path.join(cons.config.build_dir, 'System.map')
|
||||
trace_dat = os.path.join(TMPDIR, 'trace.dat')
|
||||
trace_fg = os.path.join(TMPDIR, 'trace.fg')
|
||||
|
||||
fname, dm_f_time = collect_trace(cons)
|
||||
|
||||
check_function(cons, fname, proftool, map_fname, trace_dat)
|
||||
trace_time = check_funcgraph(cons, fname, proftool, map_fname, trace_dat)
|
||||
|
||||
# Check that bootstage and funcgraph agree to within 10 microseconds
|
||||
diff = abs(trace_time - dm_f_time)
|
||||
print(f'trace_time {trace_time}, dm_f_time {dm_f_time}')
|
||||
assert diff / dm_f_time < 0.01
|
||||
|
||||
fg_time = check_flamegraph(cons, fname, proftool, map_fname, trace_fg)
|
||||
|
||||
# Check that bootstage and flamegraph agree to within 10%
|
||||
diff = abs(fg_time - dm_f_time)
|
||||
assert diff / dm_f_time < 0.1
|
|
@ -215,7 +215,10 @@ hostprogs-$(CONFIG_NETCONSOLE) += ncb
|
|||
|
||||
hostprogs-$(CONFIG_ARCH_KIRKWOOD) += kwboot
|
||||
hostprogs-$(CONFIG_ARCH_MVEBU) += kwboot
|
||||
|
||||
hostprogs-y += proftool
|
||||
proftool-objs = proftool.o lib/abuf.o
|
||||
|
||||
hostprogs-$(CONFIG_STATIC_RELA) += relocate-rela
|
||||
hostprogs-$(CONFIG_RISCV) += prelink-riscv
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# This Dockerfile is used to build an image containing basic stuff to be used
|
||||
# to build U-Boot and run our test suites.
|
||||
|
||||
FROM ubuntu:jammy-20221130
|
||||
FROM ubuntu:jammy-20230126
|
||||
MAINTAINER Tom Rini <trini@konsulko.com>
|
||||
LABEL Description=" This image is for building U-Boot inside a container"
|
||||
|
||||
|
@ -173,7 +173,7 @@ RUN git clone git://git.savannah.gnu.org/grub.git /tmp/grub && \
|
|||
true && \
|
||||
rm -rf /tmp/grub
|
||||
|
||||
RUN git clone git://git.qemu.org/qemu.git /tmp/qemu && \
|
||||
RUN git clone https://gitlab.com/qemu-project/qemu.git /tmp/qemu && \
|
||||
cd /tmp/qemu && \
|
||||
git submodule update --init dtc && \
|
||||
git checkout v6.1.0 && \
|
||||
|
@ -213,6 +213,22 @@ RUN git clone https://github.com/stefanberger/swtpm /tmp/swtpm && \
|
|||
make install && \
|
||||
rm -rf /tmp/swtpm
|
||||
|
||||
# Build trace-cmd
|
||||
RUN mkdir /tmp/trace && \
|
||||
git clone https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git /tmp/trace/libtraceevent && \
|
||||
cd /tmp/trace/libtraceevent && \
|
||||
make -j$(nproc) && \
|
||||
sudo make install && \
|
||||
git clone https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git /tmp/trace/libtracefs && \
|
||||
cd /tmp/trace/libtracefs && \
|
||||
make -j$(nproc) && \
|
||||
sudo make install && \
|
||||
git clone https://github.com/rostedt/trace-cmd.git /tmp/trace/trace-cmd && \
|
||||
cd /tmp/trace/trace-cmd && \
|
||||
make -j$(nproc) && \
|
||||
sudo make install && \
|
||||
rm -rf /tmp/trace
|
||||
|
||||
# Create our user/group
|
||||
RUN echo uboot ALL=NOPASSWD: ALL > /etc/sudoers.d/uboot
|
||||
RUN useradd -m -U uboot
|
||||
|
|
1602
tools/proftool.c
1602
tools/proftool.c
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue