Skip to content

Commit ac4c2a3

Browse files
Joakim Tjernlundtorvalds
Joakim Tjernlund
authored andcommitted
zlib: optimize inffast when copying direct from output
JFFS2 uses lesser compression ratio and inflate always ends up in "copy direct from output" case. This patch tries to optimize the direct copy procedure. Uses get_unaligned() but only in one place. The copy loop just above this one can also use this optimization, but I havn't done so as I have not tested if it is a win there too. On my MPC8321 this is about 17% faster on my JFFS2 root FS than the original. [[email protected]: coding-style fixes] Signed-off-by: Joakim Tjernlund <[email protected]> Cc: Roel Kluin <[email protected]> Cc: Richard Purdie <[email protected]> Cc: David Woodhouse <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 129182e commit ac4c2a3

File tree

2 files changed

+47
-12
lines changed

2 files changed

+47
-12
lines changed

arch/powerpc/boot/Makefile

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
all: $(obj)/zImage
2121

2222
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
23-
-fno-strict-aliasing -Os -msoft-float -pipe \
23+
-fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\
2424
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
2525
-isystem $(shell $(CROSS32CC) -print-file-name=include)
2626
BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
@@ -34,6 +34,8 @@ BOOTCFLAGS += -fno-stack-protector
3434
endif
3535

3636
BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
37+
BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include
38+
BOOTCFLAGS += -Iinclude
3739

3840
DTS_FLAGS ?= -p 1024
3941

lib/zlib_inflate/inffast.c

+44-11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
*/
55

66
#include <linux/zutil.h>
7+
#include <asm/unaligned.h>
8+
#include <asm/byteorder.h>
79
#include "inftrees.h"
810
#include "inflate.h"
911
#include "inffast.h"
@@ -24,9 +26,11 @@
2426
#ifdef POSTINC
2527
# define OFF 0
2628
# define PUP(a) *(a)++
29+
# define UP_UNALIGNED(a) get_unaligned((a)++)
2730
#else
2831
# define OFF 1
2932
# define PUP(a) *++(a)
33+
# define UP_UNALIGNED(a) get_unaligned(++(a))
3034
#endif
3135

3236
/*
@@ -239,18 +243,47 @@ void inflate_fast(z_streamp strm, unsigned start)
239243
}
240244
}
241245
else {
246+
unsigned short *sout;
247+
unsigned long loops;
248+
242249
from = out - dist; /* copy direct from output */
243-
do { /* minimum length is three */
244-
PUP(out) = PUP(from);
245-
PUP(out) = PUP(from);
246-
PUP(out) = PUP(from);
247-
len -= 3;
248-
} while (len > 2);
249-
if (len) {
250-
PUP(out) = PUP(from);
251-
if (len > 1)
252-
PUP(out) = PUP(from);
253-
}
250+
/* minimum length is three */
251+
/* Align out addr */
252+
if (!((long)(out - 1 + OFF) & 1)) {
253+
PUP(out) = PUP(from);
254+
len--;
255+
}
256+
sout = (unsigned short *)(out - OFF);
257+
if (dist > 2) {
258+
unsigned short *sfrom;
259+
260+
sfrom = (unsigned short *)(from - OFF);
261+
loops = len >> 1;
262+
do
263+
PUP(sout) = UP_UNALIGNED(sfrom);
264+
while (--loops);
265+
out = (unsigned char *)sout + OFF;
266+
from = (unsigned char *)sfrom + OFF;
267+
} else { /* dist == 1 or dist == 2 */
268+
unsigned short pat16;
269+
270+
pat16 = *(sout-2+2*OFF);
271+
if (dist == 1)
272+
#if defined(__BIG_ENDIAN)
273+
pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
274+
#elif defined(__LITTLE_ENDIAN)
275+
pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
276+
#else
277+
#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
278+
#endif
279+
loops = len >> 1;
280+
do
281+
PUP(sout) = pat16;
282+
while (--loops);
283+
out = (unsigned char *)sout + OFF;
284+
}
285+
if (len & 1)
286+
PUP(out) = PUP(from);
254287
}
255288
}
256289
else if ((op & 64) == 0) { /* 2nd level distance code */

0 commit comments

Comments
 (0)