crc32: Add crc32 implementation using __builtin_aarch64_crc32b

ARMv8.0 has optional crc32 instruction for crc32 calculation. The
instruction is mandatory since ARMv8.1. The crc32 calculation is
faster using the dedicated instruction, e.g. 1.4 GHz iMX8MN gives:

  => time crc32 0x50000000 0x2000000
  time: 0.126 seconds # crc32 instruction
  time: 0.213 seconds # software crc32

Add implementation using the compiler builtin wrapper for the crc32
instruction and enable it by default, since we don't support any
platforms which do not implement this instruction.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Simon Glass <sjg@chromium.org>
[trini: Make crc32_table guarded by CONFIG_ARM64_CRC32]
Signed-off-by: Tom Rini <trini@konsulko.com>
This commit is contained in:
Marek Vasut 2021-08-30 15:05:23 +02:00 committed by Tom Rini
parent 37479e65a3
commit 270f8710f9
3 changed files with 22 additions and 1 deletions

View file

@ -9,6 +9,16 @@ config ARM64
select PHYS_64BIT select PHYS_64BIT
select SYS_CACHE_SHIFT_6 select SYS_CACHE_SHIFT_6
config ARM64_CRC32
bool "Enable support for CRC32 instruction"
depends on ARM64
default y
help
ARMv8 implements dedicated crc32 instruction for crc32 calculation.
This is faster than software crc32 calculation. This instruction may
not be present on all ARMv8.0, but is always present on ARMv8.1 and
newer.
config POSITION_INDEPENDENT config POSITION_INDEPENDENT
bool "Generate position-independent pre-relocation code" bool "Generate position-independent pre-relocation code"
depends on ARM64 || CPU_V7A depends on ARM64 || CPU_V7A

View file

@ -18,7 +18,11 @@ arch-$(CONFIG_CPU_V7A) =$(call cc-option, -march=armv7-a, \
$(call cc-option, -march=armv7)) $(call cc-option, -march=armv7))
arch-$(CONFIG_CPU_V7M) =-march=armv7-m arch-$(CONFIG_CPU_V7M) =-march=armv7-m
arch-$(CONFIG_CPU_V7R) =-march=armv7-r arch-$(CONFIG_CPU_V7R) =-march=armv7-r
ifeq ($(CONFIG_ARM64_CRC32),y)
arch-$(CONFIG_ARM64) =-march=armv8-a+crc
else
arch-$(CONFIG_ARM64) =-march=armv8-a arch-$(CONFIG_ARM64) =-march=armv8-a
endif
# On Tegra systems we must build SPL for the armv4 core on the device # On Tegra systems we must build SPL for the armv4 core on the device
# but otherwise we can use the value in CONFIG_SYS_ARM_ARCH # but otherwise we can use the value in CONFIG_SYS_ARM_ARCH

View file

@ -84,7 +84,7 @@ static void __efi_runtime make_crc_table(void)
} }
crc_table_empty = 0; crc_table_empty = 0;
} }
#else #elif !defined(CONFIG_ARM64_CRC32)
/* ======================================================================== /* ========================================================================
* Table of CRC-32's of all single-byte values (made by make_crc_table) * Table of CRC-32's of all single-byte values (made by make_crc_table)
*/ */
@ -184,6 +184,12 @@ const uint32_t * ZEXPORT get_crc_table()
*/ */
uint32_t __efi_runtime crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len) uint32_t __efi_runtime crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len)
{ {
#ifdef CONFIG_ARM64_CRC32
crc = cpu_to_le32(crc);
while (len--)
crc = __builtin_aarch64_crc32b(crc, *buf++);
return le32_to_cpu(crc);
#else
const uint32_t *tab = crc_table; const uint32_t *tab = crc_table;
const uint32_t *b =(const uint32_t *)buf; const uint32_t *b =(const uint32_t *)buf;
size_t rem_len; size_t rem_len;
@ -221,6 +227,7 @@ uint32_t __efi_runtime crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len)
} }
return le32_to_cpu(crc); return le32_to_cpu(crc);
#endif
} }
#undef DO_CRC #undef DO_CRC