123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230 |
- package crc32
- import "unsafe"
- func haveSSE41() bool
- func haveSSE42() bool
- func haveCLMUL() bool
- func castagnoliSSE42(crc uint32, p []byte) uint32
- func castagnoliSSE42Triple(
- crcA, crcB, crcC uint32,
- a, b, c []byte,
- rounds uint32,
- ) (retA uint32, retB uint32, retC uint32)
- func ieeeCLMUL(crc uint32, p []byte) uint32
- var sse42 = haveSSE42()
- var useFastIEEE = haveCLMUL() && haveSSE41()
- const castagnoliK1 = 168
- const castagnoliK2 = 1344
- type sse42Table [4]Table
- var castagnoliSSE42TableK1 *sse42Table
- var castagnoliSSE42TableK2 *sse42Table
- func archAvailableCastagnoli() bool {
- return sse42
- }
- func archInitCastagnoli() {
- if !sse42 {
- panic("arch-specific Castagnoli not available")
- }
- castagnoliSSE42TableK1 = new(sse42Table)
- castagnoliSSE42TableK2 = new(sse42Table)
-
-
-
-
-
-
- var tmp [castagnoliK2]byte
- for b := 0; b < 4; b++ {
- for i := 0; i < 256; i++ {
- val := uint32(i) << uint32(b*8)
- castagnoliSSE42TableK1[b][i] = castagnoliSSE42(val, tmp[:castagnoliK1])
- castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:])
- }
- }
- }
- func castagnoliShift(table *sse42Table, crc uint32) uint32 {
- return table[3][crc>>24] ^
- table[2][(crc>>16)&0xFF] ^
- table[1][(crc>>8)&0xFF] ^
- table[0][crc&0xFF]
- }
- func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
- if !sse42 {
- panic("not available")
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- crc = ^crc
-
-
- if len(p) >= castagnoliK1*3 {
- delta := int(uintptr(unsafe.Pointer(&p[0])) & 7)
- if delta != 0 {
- delta = 8 - delta
- crc = castagnoliSSE42(crc, p[:delta])
- p = p[delta:]
- }
- }
-
- for len(p) >= castagnoliK2*3 {
-
- crcA, crcB, crcC := castagnoliSSE42Triple(
- crc, 0, 0,
- p, p[castagnoliK2:], p[castagnoliK2*2:],
- castagnoliK2/24)
-
- crcAB := castagnoliShift(castagnoliSSE42TableK2, crcA) ^ crcB
-
- crc = castagnoliShift(castagnoliSSE42TableK2, crcAB) ^ crcC
- p = p[castagnoliK2*3:]
- }
-
- for len(p) >= castagnoliK1*3 {
-
- crcA, crcB, crcC := castagnoliSSE42Triple(
- crc, 0, 0,
- p, p[castagnoliK1:], p[castagnoliK1*2:],
- castagnoliK1/24)
-
- crcAB := castagnoliShift(castagnoliSSE42TableK1, crcA) ^ crcB
-
- crc = castagnoliShift(castagnoliSSE42TableK1, crcAB) ^ crcC
- p = p[castagnoliK1*3:]
- }
-
- crc = castagnoliSSE42(crc, p)
- return ^crc
- }
- func archAvailableIEEE() bool {
- return useFastIEEE
- }
- var archIeeeTable8 *slicing8Table
- func archInitIEEE() {
- if !useFastIEEE {
- panic("not available")
- }
-
- archIeeeTable8 = slicingMakeTable(IEEE)
- }
- func archUpdateIEEE(crc uint32, p []byte) uint32 {
- if !useFastIEEE {
- panic("not available")
- }
- if len(p) >= 64 {
- left := len(p) & 15
- do := len(p) - left
- crc = ^ieeeCLMUL(^crc, p[:do])
- p = p[do:]
- }
- if len(p) == 0 {
- return crc
- }
- return slicingUpdate(crc, archIeeeTable8, p)
- }
|