mirror of
https://github.com/AsahiLinux/u-boot
synced 2024-11-10 15:14:43 +00:00
lzma: Fix decompression speed regression
Uncompressing a 1.7Mbytes FIT image on U-boot 2023.04 takes approx 7s on a powerpc 8xx. The same on U-boot 2023.07-rc6 takes approx 28s unless watchdog is disabled. During that decompression, LzmaDec_DecodeReal() calls schedule 1.6 million times, that is every 4µs in average. In the past it used to be a call to WATCHDOG_RESET() which was just calling hw_watchdog_reset(). But the combination of commit29caf9305b
("cyclic: Use schedule() instead of WATCHDOG_RESET()") and commit26e8ebcd7c
("watchdog: mpc8xxx: Make it generic") results in an heavier processing. However, there is absolutely no point in calling schedule() that often. By moving and keeping only one call to schedule() in the main loop the number of calls is reduced to 1.2 million which is still too much. So add logic to only call schedule every 1024 times. That leads to a call to schedule approx every 6ms which is still far enough to entertain the watchdog which has a 1s timeout on powerpc 8xx. powerpc 8xx being one of the slowest targets we have today in U-boot, and most other watchdogs having a timeout of one minutes instead of one second like the 8xx, this fix should not have negative impact on other targets. Fixes:29caf9305b
("cyclic: Use schedule() instead of WATCHDOG_RESET()") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Simon Glass <sjg@chromium.org>
This commit is contained in:
parent
820801eacc
commit
ad47974707
1 changed files with 4 additions and 14 deletions
|
@ -152,8 +152,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
const Byte *buf = p->buf;
|
||||
UInt32 range = p->range;
|
||||
UInt32 code = p->code;
|
||||
|
||||
schedule();
|
||||
unsigned int loop = 0;
|
||||
|
||||
do
|
||||
{
|
||||
|
@ -162,6 +161,9 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
unsigned ttt;
|
||||
unsigned posState = processedPos & pbMask;
|
||||
|
||||
if (!(loop++ & 1023))
|
||||
schedule();
|
||||
|
||||
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
|
@ -177,8 +179,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
state -= (state < 4) ? state : 3;
|
||||
symbol = 1;
|
||||
|
||||
schedule();
|
||||
|
||||
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
|
||||
}
|
||||
else
|
||||
|
@ -188,8 +188,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
state -= (state < 10) ? 3 : 6;
|
||||
symbol = 1;
|
||||
|
||||
schedule();
|
||||
|
||||
do
|
||||
{
|
||||
unsigned bit;
|
||||
|
@ -321,8 +319,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
UInt32 mask = 1;
|
||||
unsigned i = 1;
|
||||
|
||||
schedule();
|
||||
|
||||
do
|
||||
{
|
||||
GET_BIT2(prob + i, i, ; , distance |= mask);
|
||||
|
@ -335,8 +331,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
{
|
||||
numDirectBits -= kNumAlignBits;
|
||||
|
||||
schedule();
|
||||
|
||||
do
|
||||
{
|
||||
NORMALIZE
|
||||
|
@ -409,8 +403,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
const Byte *lim = dest + curLen;
|
||||
dicPos += curLen;
|
||||
|
||||
schedule();
|
||||
|
||||
do
|
||||
*(dest) = (Byte)*(dest + src);
|
||||
while (++dest != lim);
|
||||
|
@ -418,8 +410,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte
|
|||
else
|
||||
{
|
||||
|
||||
schedule();
|
||||
|
||||
do
|
||||
{
|
||||
dic[dicPos++] = dic[pos];
|
||||
|
|
Loading…
Reference in a new issue