Skip to content

Commit

Permalink
perf(tea): unrolling encode
Browse files Browse the repository at this point in the history
name         old time/op    new time/op    delta
TEAen/16-8      237ns ± 0%     227ns ± 1%  -4.03%  (p=0.008 n=5+5)
TEAen/256-8    1.69µs ± 0%    1.60µs ± 1%  -4.83%  (p=0.008 n=5+5)
TEAen/4K-8     25.0µs ± 1%    23.8µs ± 2%  -4.73%  (p=0.008 n=5+5)
TEAen/32K-8     199µs ± 1%     188µs ± 1%  -5.30%  (p=0.008 n=5+5)
TEAde/16-8      196ns ± 1%     194ns ± 1%  -0.89%  (p=0.016 n=5+5)
TEAde/256-8    1.56µs ± 0%    1.54µs ± 0%  -1.50%  (p=0.008 n=5+5)
TEAde/4K-8     23.5µs ± 0%    23.2µs ± 0%  -1.46%  (p=0.008 n=5+5)
TEAde/32K-8     187µs ± 1%     184µs ± 1%  -1.50%  (p=0.008 n=5+5)

name         old speed      new speed      delta
TEAen/16-8   67.6MB/s ± 1%  70.5MB/s ± 1%  +4.20%  (p=0.008 n=5+5)
TEAen/256-8   152MB/s ± 0%   160MB/s ± 1%  +5.07%  (p=0.008 n=5+5)
TEAen/4K-8    164MB/s ± 1%   172MB/s ± 2%  +4.98%  (p=0.008 n=5+5)
TEAen/32K-8   165MB/s ± 1%   174MB/s ± 1%  +5.60%  (p=0.008 n=5+5)
TEAde/16-8    163MB/s ± 1%   165MB/s ± 1%  +0.90%  (p=0.016 n=5+5)
TEAde/256-8   174MB/s ± 0%   177MB/s ± 0%  +1.51%  (p=0.008 n=5+5)
TEAde/4K-8    175MB/s ± 0%   177MB/s ± 0%  +1.48%  (p=0.008 n=5+5)
TEAde/32K-8   175MB/s ± 1%   178MB/s ± 1%  +1.52%  (p=0.008 n=5+5)
  • Loading branch information
wdvxdr1123 committed Nov 17, 2021
1 parent 95364f1 commit 574c4e5
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 55 deletions.
122 changes: 67 additions & 55 deletions binary/tea.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,32 +48,44 @@ func (t TEA) Decrypt(data []byte) []byte {
return dst[dst[0]&7+3 : len(data)-7]
}

var sumTable = [0x10]uint32{
0x9e3779b9,
0x3c6ef372,
0xdaa66d2b,
0x78dde6e4,
0x1715609d,
0xb54cda56,
0x5384540f,
0xf1bbcdc8,
0x8ff34781,
0x2e2ac13a,
0xcc623af3,
0x6a99b4ac,
0x08d12e65,
0xa708a81e,
0x454021d7,
0xe3779b90,
}

//go:nosplit
func (t *TEA) encode(n uint64) uint64 {
v0, v1 := uint32(n>>32), uint32(n)
for i := 0; i < 0x10; i++ {
v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
}
t0, t1, t2, t3 := t[0], t[1], t[2], t[3]

v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)

return uint64(v0)<<32 | uint64(v1)
}

Expand All @@ -83,38 +95,38 @@ func (t *TEA) decode(n uint64) uint64 {
v0, v1 := uint32(n>>32), uint32(n)
t0, t1, t2, t3 := t[0], t[1], t[2], t[3]

v1 -= ((v0 << 4) + t2) ^ (v0 + 0xe3779b90) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xe3779b90) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x454021d7) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x454021d7) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0xa708a81e) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xa708a81e) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x8d12e65) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x8d12e65) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x6a99b4ac) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x6a99b4ac) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0xcc623af3) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xcc623af3) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x2e2ac13a) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x2e2ac13a) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x8ff34781) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x8ff34781) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0xf1bbcdc8) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xf1bbcdc8) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x5384540f) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x5384540f) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0xb54cda56) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xb54cda56) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x1715609d) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x1715609d) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x78dde6e4) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x78dde6e4) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0xdaa66d2b) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0xdaa66d2b) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x3c6ef372) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x3c6ef372) ^ ((v1 >> 5) + t1)
v1 -= ((v0 << 4) + t2) ^ (v0 + 0x9e3779b9) ^ ((v0 >> 5) + t3)
v0 -= ((v1 << 4) + t0) ^ (v1 + 0x9e3779b9) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)

return uint64(v0)<<32 | uint64(v1)
}
Expand Down
12 changes: 12 additions & 0 deletions binary/tea_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,15 @@ func BenchmarkTEAde(b *testing.B) {
benchDecrypt(b, data)
})
}

func BenchmarkTEA_encode(b *testing.B) {
for i := 0; i < b.N; i++ {
testTEA.encode(114514)
}
}

func BenchmarkTEA_decode(b *testing.B) {
for i := 0; i < b.N; i++ {
testTEA.decode(114514)
}
}

0 comments on commit 574c4e5

Please sign in to comment.