Skip to content

Commit

Permalink
powerpc/64: Use optimized checksum routines on little-endian
Browse files Browse the repository at this point in the history
Currently we have optimized hand-coded assembly checksum routines for
big-endian 64-bit systems, but for little-endian we use the generic C
routines. This modifies the optimized routines to work for
little-endian. With this, we no longer need to enable
CONFIG_GENERIC_CSUM. This also fixes a couple of comments in
checksum_64.S so they accurately reflect what the associated instruction
does.

Signed-off-by: Paul Mackerras <[email protected]>
[mpe: Use the more common __BIG_ENDIAN__]
Signed-off-by: Michael Ellerman <[email protected]>
  • Loading branch information
paulusmack authored and mpe committed Jan 25, 2017
1 parent b492f7e commit d4fde56
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ config PPC
select HAVE_CC_STACKPROTECTOR

config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
def_bool n

config EARLY_PRINTK
bool
Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/include/asm/checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,

s += (__force u32)saddr;
s += (__force u32)daddr;
#ifdef __BIG_ENDIAN__
s += proto + len;
#else
s += (proto + len) << 8;
#endif
return (__force __wsum) from64to32(s);
#else
__asm__("\n\
Expand Down
2 changes: 0 additions & 2 deletions arch/powerpc/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o

ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y += checksum_$(BITS).o checksum_wrappers.o
endif

obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o

Expand Down
12 changes: 10 additions & 2 deletions arch/powerpc/lib/checksum_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*/
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcsum_aligned

li r7,4
Expand Down Expand Up @@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
beq .Lcsum_finish

lbz r6,0(r3)
#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
#else
adde r0,r0,r6
#endif

.Lcsum_finish:
addze r0,r0 /* add in final carry */
Expand Down Expand Up @@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic)
* If the source and destination are relatively unaligned we only
* align the source. This keeps things simple.
*/
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcopy_aligned

li r9,4
Expand Down Expand Up @@ -386,8 +390,12 @@ dstnr; sth r6,0(r4)
beq .Lcopy_finish

srcnr; lbz r6,0(r3)
#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
#else
adde r0,r0,r6
#endif
dstnr; stb r6,0(r4)

.Lcopy_finish:
Expand Down

0 comments on commit d4fde56

Please sign in to comment.