mirror of
https://github.com/AsahiLinux/u-boot
synced 2024-11-10 15:14:43 +00:00
arm: Switch to -mno-unaligned-access when supported by the compiler
When we tell the compiler to optimize for ARMv7 (and ARMv6 for that matter) it assumes a default of SCTRL.A being cleared and unaligned accesses being allowed and fast at the hardware level. We set this bit and must pass along -mno-unaligned-access so that the compiler will still breakdown accesses and not trigger a data abort. To better help understand the requirements of the project with respect to unaligned memory access, the Documentation/unaligned-memory-access.txt file has been added as doc/README.unaligned-memory-access.txt and is taken from the v3.14-rc1 tag of the kernel. Cc: Albert ARIBAUD <albert.u.boot@aribaud.net> Cc: Mans Rullgard <mans@mansr.com> Signed-off-by: Tom Rini <trini@ti.com>
This commit is contained in:
parent
f503cc49a5
commit
1551df35f2
9 changed files with 248 additions and 138 deletions
2
README
2
README
|
@ -1726,7 +1726,7 @@ CBFS (Coreboot Filesystem) support
|
|||
|
||||
If this option is set, then U-Boot will prevent the environment
|
||||
variable "splashimage" from being set to a problematic address
|
||||
(see README.displaying-bmps and README.arm-unaligned-accesses).
|
||||
(see README.displaying-bmps).
|
||||
This option is useful for targets where, due to alignment
|
||||
restrictions, an improperly aligned BMP image will cause a data
|
||||
abort. If you think you will not have problems with unaligned
|
||||
|
|
|
@ -10,9 +10,12 @@
|
|||
PF_CPPFLAGS_ARMV7 := $(call cc-option, -march=armv7-a, -march=armv5)
|
||||
PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV7)
|
||||
|
||||
# SEE README.arm-unaligned-accesses
|
||||
# On supported platforms we set the bit which causes us to trap on unaligned
|
||||
# memory access. This is the opposite of what the compiler expects to be
|
||||
# the default so we must pass in -mno-unaligned-access so that it is aware
|
||||
# of our decision.
|
||||
PF_NO_UNALIGNED := $(call cc-option, -mno-unaligned-access,)
|
||||
PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
|
||||
PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
|
||||
|
||||
ifneq ($(CONFIG_IMX_CONFIG),)
|
||||
ifdef CONFIG_SPL
|
||||
|
|
|
@ -6,10 +6,7 @@
|
|||
#
|
||||
PLATFORM_RELFLAGS += -fno-common -ffixed-x18
|
||||
|
||||
# SEE README.arm-unaligned-accesses
|
||||
PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
|
||||
PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
|
||||
|
||||
PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
|
||||
PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
|
||||
PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
|
||||
PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
|
||||
|
|
|
@ -153,7 +153,7 @@ void do_prefetch_abort (struct pt_regs *pt_regs)
|
|||
|
||||
void do_data_abort (struct pt_regs *pt_regs)
|
||||
{
|
||||
printf ("data abort\n\n MAYBE you should read doc/README.arm-unaligned-accesses\n\n");
|
||||
printf ("data abort\n");
|
||||
show_regs (pt_regs);
|
||||
bad_mode ();
|
||||
}
|
||||
|
|
|
@ -239,5 +239,3 @@ obj-y += memsize.o
|
|||
obj-y += stdio.o
|
||||
|
||||
CFLAGS_env_embedded.o := -Wa,--no-warn -DENV_CRC=$(shell tools/envcrc 2>/dev/null)
|
||||
CFLAGS_hush.o := $(PLATFORM_NO_UNALIGNED)
|
||||
CFLAGS_fdt_support.o := $(PLATFORM_NO_UNALIGNED)
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
If you are reading this because of a data abort: the following MIGHT
|
||||
be relevant to your abort, if it was caused by an alignment violation.
|
||||
In order to determine this, use the PC from the abort dump along with
|
||||
an objdump -s -S of the u-boot ELF binary to locate the function where
|
||||
the abort happened; then compare this function with the examples below.
|
||||
If they match, then you've been hit with a compiler generated unaligned
|
||||
access, and you should rewrite your code or add -mno-unaligned-access
|
||||
to the command line of the offending file.
|
||||
|
||||
Note that the PC shown in the abort message is relocated. In order to
|
||||
be able to match it to an address in the ELF binary dump, you will need
|
||||
to know the relocation offset. If your target defines CONFIG_CMD_BDI
|
||||
and if you can get to the prompt and enter commands before the abort
|
||||
happens, then command "bdinfo" will give you the offset. Otherwise you
|
||||
will need to try a build with DEBUG set, which will display the offset,
|
||||
or use a debugger and set a breakpoint at relocate_code() to see the
|
||||
offset (passed as an argument).
|
||||
|
||||
*
|
||||
|
||||
Since U-Boot runs on a variety of hardware, some only able to perform
|
||||
unaligned accesses with a strong penalty, some unable to perform them
|
||||
at all, the policy regarding unaligned accesses is to not perform any,
|
||||
unless absolutely necessary because of hardware or standards.
|
||||
|
||||
Also, on hardware which permits it, the core is configured to throw
|
||||
data abort exceptions on unaligned accesses in order to catch these
|
||||
unallowed accesses as early as possible.
|
||||
|
||||
Until version 4.7, the gcc default for performing unaligned accesses
|
||||
(-mno-unaligned-access) is to emulate unaligned accesses using aligned
|
||||
loads and stores plus shifts and masks. Emulated unaligned accesses
|
||||
will not be caught by hardware. These accesses may be costly and may
|
||||
be actually unnecessary. In order to catch these accesses and remove
|
||||
or optimize them, option -munaligned-access is explicitly set for all
|
||||
versions of gcc which support it.
|
||||
|
||||
From gcc 4.7 onward starting at armv7 architectures, the default for
|
||||
performing unaligned accesses is to use unaligned native loads and
|
||||
stores (-munaligned-access), because the cost of unaligned accesses
|
||||
has dropped on armv7 and beyond. This should not affect U-Boot's
|
||||
policy of controlling unaligned accesses, however the compiler may
|
||||
generate uncontrolled unaligned accesses on its own in at least one
|
||||
known case: when declaring a local initialized char array, e.g.
|
||||
|
||||
function foo()
|
||||
{
|
||||
char buffer[] = "initial value";
|
||||
/* or */
|
||||
char buffer[] = { 'i', 'n', 'i', 't', 0 };
|
||||
...
|
||||
}
|
||||
|
||||
Under -munaligned-accesses with optimizations on, this declaration
|
||||
causes the compiler to generate native loads from the literal string
|
||||
and native stores to the buffer, and the literal string alignment
|
||||
cannot be controlled. If it is misaligned, then the core will throw
|
||||
a data abort exception.
|
||||
|
||||
Quite probably the same might happen for 16-bit array initializations
|
||||
where the constant is aligned on a boundary which is a multiple of 2
|
||||
but not of 4:
|
||||
|
||||
function foo()
|
||||
{
|
||||
u16 buffer[] = { 1, 2, 3 };
|
||||
...
|
||||
}
|
||||
|
||||
The long term solution to this issue is to add an option to gcc to
|
||||
allow controlling the general alignment of data, including constant
|
||||
initialization values.
|
||||
|
||||
However this will only apply to the version of gcc which will have such
|
||||
an option. For other versions, there are four workarounds:
|
||||
|
||||
a) Enforce as a rule that array initializations as described above
|
||||
are forbidden. This is generally not acceptable as they are valid,
|
||||
and usual, C constructs. The only case where they could be rejected
|
||||
is when they actually equate to a const char* declaration, i.e. the
|
||||
array is initialized and never modified in the function's scope.
|
||||
|
||||
b) Drop the requirement on unaligned accesses at least for ARMv7,
|
||||
i.e. do not throw a data abort exception upon unaligned accesses.
|
||||
But that will allow adding badly aligned code to U-Boot, only for
|
||||
it to fail when re-used with a stricter target, possibly once the
|
||||
bad code is already in mainline.
|
||||
|
||||
c) Relax the -munaligned-access rule globally. This will prevent native
|
||||
unaligned accesses of course, but that will also hide any bug caused
|
||||
by a bad unaligned access, making it much harder to diagnose it. It
|
||||
is actually what already happens when building ARM targets with a
|
||||
pre-4.7 gcc, and it may actually already hide some bugs yet unseen
|
||||
until the target gets compiled with -munaligned-access.
|
||||
|
||||
d) Relax the -munaligned-access rule only for for files susceptible to
|
||||
the local initialized array issue and for armv7 architectures and
|
||||
beyond. This minimizes the quantity of code which can hide unwanted
|
||||
misaligned accesses.
|
||||
|
||||
The option retained is d).
|
||||
|
||||
Considering that actual occurrences of the issue are rare (as of this
|
||||
writing, 5 files out of 7840 in U-Boot, or .3%, contain an initialized
|
||||
local char array which cannot actually be replaced with a const char*),
|
||||
contributors should not be required to systematically try and detect
|
||||
the issue in their patches.
|
||||
|
||||
Detecting files susceptible to the issue can be automated through a
|
||||
filter installed as a hook in .git which recognizes local char array
|
||||
initializations. Automation should err on the false positive side, for
|
||||
instance flagging non-local arrays as if they were local if they cannot
|
||||
be told apart.
|
||||
|
||||
In any case, detection shall not prevent committing the patch, but
|
||||
shall pre-populate the commit message with a note to the effect that
|
||||
this patch contains an initialized local char or 16-bit array and thus
|
||||
should be protected from the gcc 4.7 issue.
|
||||
|
||||
Upon a positive detection, either $(PLATFORM_NO_UNALIGNED) should be
|
||||
added to CFLAGS for the affected file(s), or if the array is a pseudo
|
||||
const char*, it should be replaced by an actual one.
|
240
doc/README.unaligned-memory-access.txt
Normal file
240
doc/README.unaligned-memory-access.txt
Normal file
|
@ -0,0 +1,240 @@
|
|||
Editors note: This document is _heavily_ cribbed from the Linux Kernel, with
|
||||
really only the section about "Alignment vs. Networking" removed.
|
||||
|
||||
UNALIGNED MEMORY ACCESSES
|
||||
=========================
|
||||
|
||||
Linux runs on a wide variety of architectures which have varying behaviour
|
||||
when it comes to memory access. This document presents some details about
|
||||
unaligned accesses, why you need to write code that doesn't cause them,
|
||||
and how to write such code!
|
||||
|
||||
|
||||
The definition of an unaligned access
|
||||
=====================================
|
||||
|
||||
Unaligned memory accesses occur when you try to read N bytes of data starting
|
||||
from an address that is not evenly divisible by N (i.e. addr % N != 0).
|
||||
For example, reading 4 bytes of data from address 0x10004 is fine, but
|
||||
reading 4 bytes of data from address 0x10005 would be an unaligned memory
|
||||
access.
|
||||
|
||||
The above may seem a little vague, as memory access can happen in different
|
||||
ways. The context here is at the machine code level: certain instructions read
|
||||
or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
|
||||
assembly). As will become clear, it is relatively easy to spot C statements
|
||||
which will compile to multiple-byte memory access instructions, namely when
|
||||
dealing with types such as u16, u32 and u64.
|
||||
|
||||
|
||||
Natural alignment
|
||||
=================
|
||||
|
||||
The rule mentioned above forms what we refer to as natural alignment:
|
||||
When accessing N bytes of memory, the base memory address must be evenly
|
||||
divisible by N, i.e. addr % N == 0.
|
||||
|
||||
When writing code, assume the target architecture has natural alignment
|
||||
requirements.
|
||||
|
||||
In reality, only a few architectures require natural alignment on all sizes
|
||||
of memory access. However, we must consider ALL supported architectures;
|
||||
writing code that satisfies natural alignment requirements is the easiest way
|
||||
to achieve full portability.
|
||||
|
||||
|
||||
Why unaligned access is bad
|
||||
===========================
|
||||
|
||||
The effects of performing an unaligned memory access vary from architecture
|
||||
to architecture. It would be easy to write a whole document on the differences
|
||||
here; a summary of the common scenarios is presented below:
|
||||
|
||||
- Some architectures are able to perform unaligned memory accesses
|
||||
transparently, but there is usually a significant performance cost.
|
||||
- Some architectures raise processor exceptions when unaligned accesses
|
||||
happen. The exception handler is able to correct the unaligned access,
|
||||
at significant cost to performance.
|
||||
- Some architectures raise processor exceptions when unaligned accesses
|
||||
happen, but the exceptions do not contain enough information for the
|
||||
unaligned access to be corrected.
|
||||
- Some architectures are not capable of unaligned memory access, but will
|
||||
silently perform a different memory access to the one that was requested,
|
||||
resulting in a subtle code bug that is hard to detect!
|
||||
|
||||
It should be obvious from the above that if your code causes unaligned
|
||||
memory accesses to happen, your code will not work correctly on certain
|
||||
platforms and will cause performance problems on others.
|
||||
|
||||
|
||||
Code that does not cause unaligned access
|
||||
=========================================
|
||||
|
||||
At first, the concepts above may seem a little hard to relate to actual
|
||||
coding practice. After all, you don't have a great deal of control over
|
||||
memory addresses of certain variables, etc.
|
||||
|
||||
Fortunately things are not too complex, as in most cases, the compiler
|
||||
ensures that things will work for you. For example, take the following
|
||||
structure:
|
||||
|
||||
struct foo {
|
||||
u16 field1;
|
||||
u32 field2;
|
||||
u8 field3;
|
||||
};
|
||||
|
||||
Let us assume that an instance of the above structure resides in memory
|
||||
starting at address 0x10000. With a basic level of understanding, it would
|
||||
not be unreasonable to expect that accessing field2 would cause an unaligned
|
||||
access. You'd be expecting field2 to be located at offset 2 bytes into the
|
||||
structure, i.e. address 0x10002, but that address is not evenly divisible
|
||||
by 4 (remember, we're reading a 4 byte value here).
|
||||
|
||||
Fortunately, the compiler understands the alignment constraints, so in the
|
||||
above case it would insert 2 bytes of padding in between field1 and field2.
|
||||
Therefore, for standard structure types you can always rely on the compiler
|
||||
to pad structures so that accesses to fields are suitably aligned (assuming
|
||||
you do not cast the field to a type of different length).
|
||||
|
||||
Similarly, you can also rely on the compiler to align variables and function
|
||||
parameters to a naturally aligned scheme, based on the size of the type of
|
||||
the variable.
|
||||
|
||||
At this point, it should be clear that accessing a single byte (u8 or char)
|
||||
will never cause an unaligned access, because all memory addresses are evenly
|
||||
divisible by one.
|
||||
|
||||
On a related topic, with the above considerations in mind you may observe
|
||||
that you could reorder the fields in the structure in order to place fields
|
||||
where padding would otherwise be inserted, and hence reduce the overall
|
||||
resident memory size of structure instances. The optimal layout of the
|
||||
above example is:
|
||||
|
||||
struct foo {
|
||||
u32 field2;
|
||||
u16 field1;
|
||||
u8 field3;
|
||||
};
|
||||
|
||||
For a natural alignment scheme, the compiler would only have to add a single
|
||||
byte of padding at the end of the structure. This padding is added in order
|
||||
to satisfy alignment constraints for arrays of these structures.
|
||||
|
||||
Another point worth mentioning is the use of __attribute__((packed)) on a
|
||||
structure type. This GCC-specific attribute tells the compiler never to
|
||||
insert any padding within structures, useful when you want to use a C struct
|
||||
to represent some data that comes in a fixed arrangement 'off the wire'.
|
||||
|
||||
You might be inclined to believe that usage of this attribute can easily
|
||||
lead to unaligned accesses when accessing fields that do not satisfy
|
||||
architectural alignment requirements. However, again, the compiler is aware
|
||||
of the alignment constraints and will generate extra instructions to perform
|
||||
the memory access in a way that does not cause unaligned access. Of course,
|
||||
the extra instructions obviously cause a loss in performance compared to the
|
||||
non-packed case, so the packed attribute should only be used when avoiding
|
||||
structure padding is of importance.
|
||||
|
||||
|
||||
Code that causes unaligned access
|
||||
=================================
|
||||
|
||||
With the above in mind, let's move onto a real life example of a function
|
||||
that can cause an unaligned memory access. The following function taken
|
||||
from the Linux Kernel's include/linux/etherdevice.h is an optimized routine
|
||||
to compare two ethernet MAC addresses for equality.
|
||||
|
||||
bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
|
||||
((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
|
||||
|
||||
return fold == 0;
|
||||
#else
|
||||
const u16 *a = (const u16 *)addr1;
|
||||
const u16 *b = (const u16 *)addr2;
|
||||
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
In the above function, when the hardware has efficient unaligned access
|
||||
capability, there is no issue with this code. But when the hardware isn't
|
||||
able to access memory on arbitrary boundaries, the reference to a[0] causes
|
||||
2 bytes (16 bits) to be read from memory starting at address addr1.
|
||||
|
||||
Think about what would happen if addr1 was an odd address such as 0x10003.
|
||||
(Hint: it'd be an unaligned access.)
|
||||
|
||||
Despite the potential unaligned access problems with the above function, it
|
||||
is included in the kernel anyway but is understood to only work normally on
|
||||
16-bit-aligned addresses. It is up to the caller to ensure this alignment or
|
||||
not use this function at all. This alignment-unsafe function is still useful
|
||||
as it is a decent optimization for the cases when you can ensure alignment,
|
||||
which is true almost all of the time in ethernet networking context.
|
||||
|
||||
|
||||
Here is another example of some code that could cause unaligned accesses:
|
||||
void myfunc(u8 *data, u32 value)
|
||||
{
|
||||
[...]
|
||||
*((u32 *) data) = cpu_to_le32(value);
|
||||
[...]
|
||||
}
|
||||
|
||||
This code will cause unaligned accesses every time the data parameter points
|
||||
to an address that is not evenly divisible by 4.
|
||||
|
||||
In summary, the 2 main scenarios where you may run into unaligned access
|
||||
problems involve:
|
||||
1. Casting variables to types of different lengths
|
||||
2. Pointer arithmetic followed by access to at least 2 bytes of data
|
||||
|
||||
|
||||
Avoiding unaligned accesses
|
||||
===========================
|
||||
|
||||
The easiest way to avoid unaligned access is to use the get_unaligned() and
|
||||
put_unaligned() macros provided by the <asm/unaligned.h> header file.
|
||||
|
||||
Going back to an earlier example of code that potentially causes unaligned
|
||||
access:
|
||||
|
||||
void myfunc(u8 *data, u32 value)
|
||||
{
|
||||
[...]
|
||||
*((u32 *) data) = cpu_to_le32(value);
|
||||
[...]
|
||||
}
|
||||
|
||||
To avoid the unaligned memory access, you would rewrite it as follows:
|
||||
|
||||
void myfunc(u8 *data, u32 value)
|
||||
{
|
||||
[...]
|
||||
value = cpu_to_le32(value);
|
||||
put_unaligned(value, (u32 *) data);
|
||||
[...]
|
||||
}
|
||||
|
||||
The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
|
||||
memory and you wish to avoid unaligned access, its usage is as follows:
|
||||
|
||||
u32 value = get_unaligned((u32 *) data);
|
||||
|
||||
These macros work for memory accesses of any length (not just 32 bits as
|
||||
in the examples above). Be aware that when compared to standard access of
|
||||
aligned memory, using these macros to access unaligned memory can be costly in
|
||||
terms of performance.
|
||||
|
||||
If use of such macros is not convenient, another option is to use memcpy(),
|
||||
where the source or destination (or both) are of type u8* or unsigned char*.
|
||||
Due to the byte-wise nature of this operation, unaligned accesses are avoided.
|
||||
|
||||
--
|
||||
In the Linux Kernel,
|
||||
Authors: Daniel Drake <dsd@gentoo.org>,
|
||||
Johannes Berg <johannes@sipsolutions.net>
|
||||
With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
|
||||
Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
|
||||
Vadim Lobanov
|
|
@ -13,6 +13,3 @@ obj-y := ubifs.o io.o super.o sb.o master.o lpt.o
|
|||
obj-y += lpt_commit.o scan.o lprops.o
|
||||
obj-y += tnc.o tnc_misc.o debug.o crc16.o budget.o
|
||||
obj-y += log.o orphan.o recovery.o replay.o
|
||||
|
||||
# SEE README.arm-unaligned-accesses
|
||||
CFLAGS_super.o := $(PLATFORM_NO_UNALIGNED)
|
||||
|
|
|
@ -65,6 +65,3 @@ obj-y += vsprintf.o
|
|||
obj-$(CONFIG_RANDOM_MACADDR) += rand.o
|
||||
obj-$(CONFIG_BOOTP_RANDOM_DELAY) += rand.o
|
||||
obj-$(CONFIG_CMD_LINK_LOCAL) += rand.o
|
||||
|
||||
# SEE README.arm-unaligned-accesses
|
||||
CFLAGS_bzlib.o := $(PLATFORM_NO_UNALIGNED)
|
||||
|
|
Loading…
Reference in a new issue