-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
hash/crc32: add AMD64 optimized IEEE CRC calculation
IEEE is the most commonly used CRC-32 polynomial, used by zip, gzip and others. Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf benchmark old ns/op new ns/op delta BenchmarkIEEECrc1KB-8 3193 352 -88.98% BenchmarkIEEECrc4KB-8 5025 1307 -73.99% BenchmarkCastagnoliCrc1KB-8 126 126 +0.00% benchmark old MB/s new MB/s speedup BenchmarkIEEECrc1KB-8 320.68 2901.92 9.05x BenchmarkIEEECrc4KB-8 815.08 3131.80 3.84x BenchmarkCastagnoliCrc1KB-8 8100.80 8109.78 1.00x Change-Id: I99c9a48365f631827f516e44f97e86155f03cb90 Reviewed-on: https://go-review.googlesource.com/14080 Reviewed-by: Keith Randall <[email protected]>
- Loading branch information
Showing
5 changed files
with
251 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright 2011 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package crc32 | ||
|
||
// This file contains the code to call the SSE 4.2 version of the Castagnoli | ||
// and IEEE CRC. | ||
|
||
// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use | ||
// CPUID to test for SSE 4.1, 4.2 and CLMUL support. | ||
func haveSSE41() bool | ||
func haveSSE42() bool | ||
func haveCLMUL() bool | ||
|
||
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32 | ||
// instruction. | ||
func castagnoliSSE42(crc uint32, p []byte) uint32 | ||
|
||
// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ | ||
// instruction as well as SSE 4.1. | ||
func ieeeCLMUL(crc uint32, p []byte) uint32 | ||
|
||
var sse42 = haveSSE42() | ||
var useFastIEEE = haveCLMUL() && haveSSE41() | ||
|
||
func updateCastagnoli(crc uint32, p []byte) uint32 { | ||
if sse42 { | ||
return castagnoliSSE42(crc, p) | ||
} | ||
return update(crc, castagnoliTable, p) | ||
} | ||
|
||
func updateIEEE(crc uint32, p []byte) uint32 { | ||
if useFastIEEE && len(p) >= 64 { | ||
left := len(p) & 15 | ||
do := len(p) - left | ||
crc = ^ieeeCLMUL(^crc, p[:do]) | ||
if left > 0 { | ||
crc = update(crc, IEEETable, p[do:]) | ||
} | ||
return crc | ||
} | ||
|
||
// only use slicing-by-8 when input is >= 4KB | ||
if len(p) >= 4096 { | ||
iEEETable8Once.Do(func() { | ||
iEEETable8 = makeTable8(IEEE) | ||
}) | ||
return updateSlicingBy8(crc, iEEETable8, p) | ||
} | ||
|
||
return update(crc, IEEETable, p) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters