src/runtime: add per-map hash seeds

Этот коммит содержится в:
Damian Gryski 2022-03-09 13:47:27 -08:00 коммит произвёл Ron Evans
родитель 39805bca45
коммит e45ff9c0e8
2 изменённых файлов: 61 добавлений и 44 удалений

Просмотреть файл

@ -26,16 +26,16 @@ func xorshift32(x uint32) uint32 {
// This function is used by hash/maphash. // This function is used by hash/maphash.
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr { func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
if unsafe.Sizeof(uintptr(0)) > 4 { if unsafe.Sizeof(uintptr(0)) > 4 {
return seed ^ uintptr(hash64(p, s)) return uintptr(hash64(p, s, seed))
} }
return seed ^ uintptr(hash32(p, s)) return uintptr(hash32(p, s, seed))
} }
// Get FNV-1a hash of the given memory buffer. // Get FNV-1a hash of the given memory buffer.
// //
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
func hash32(ptr unsafe.Pointer, n uintptr) uint32 { func hash32(ptr unsafe.Pointer, n uintptr, seed uintptr) uint32 {
var result uint32 = 2166136261 // FNV offset basis var result uint32 = 2166136261 ^ uint32(seed) // FNV offset basis
for i := uintptr(0); i < n; i++ { for i := uintptr(0); i < n; i++ {
c := *(*uint8)(unsafe.Pointer(uintptr(ptr) + i)) c := *(*uint8)(unsafe.Pointer(uintptr(ptr) + i))
result ^= uint32(c) // XOR with byte result ^= uint32(c) // XOR with byte
@ -45,8 +45,8 @@ func hash32(ptr unsafe.Pointer, n uintptr) uint32 {
} }
// Also a FNV-1a hash. // Also a FNV-1a hash.
func hash64(ptr unsafe.Pointer, n uintptr) uint64 { func hash64(ptr unsafe.Pointer, n uintptr, seed uintptr) uint64 {
var result uint64 = 14695981039346656037 // FNV offset basis var result uint64 = 14695981039346656037 ^ uint64(seed) // FNV offset basis
for i := uintptr(0); i < n; i++ { for i := uintptr(0); i < n; i++ {
c := *(*uint8)(unsafe.Pointer(uintptr(ptr) + i)) c := *(*uint8)(unsafe.Pointer(uintptr(ptr) + i))
result ^= uint64(c) // XOR with byte result ^= uint64(c) // XOR with byte

Просмотреть файл

@ -13,12 +13,13 @@ import (
// The underlying hashmap structure for Go. // The underlying hashmap structure for Go.
type hashmap struct { type hashmap struct {
buckets unsafe.Pointer // pointer to array of buckets buckets unsafe.Pointer // pointer to array of buckets
seed uintptr
count uintptr count uintptr
keySize uint8 // maybe this can store the key type as well? E.g. keysize == 5 means string? keySize uint8 // maybe this can store the key type as well? E.g. keysize == 5 means string?
valueSize uint8 valueSize uint8
bucketBits uint8 bucketBits uint8
keyEqual func(x, y unsafe.Pointer, n uintptr) bool keyEqual func(x, y unsafe.Pointer, n uintptr) bool
keyHash func(key unsafe.Pointer, size uintptr) uint32 keyHash func(key unsafe.Pointer, size, seed uintptr) uint32
} }
type hashmapAlgorithm uint8 type hashmapAlgorithm uint8
@ -74,6 +75,7 @@ func hashmapMake(keySize, valueSize uint8, sizeHint uintptr, alg uint8) *hashmap
return &hashmap{ return &hashmap{
buckets: buckets, buckets: buckets,
seed: uintptr(fastrand()),
keySize: keySize, keySize: keySize,
valueSize: valueSize, valueSize: valueSize,
bucketBits: bucketBits, bucketBits: bucketBits,
@ -96,7 +98,7 @@ func hashmapKeyEqualAlg(alg hashmapAlgorithm) func(x, y unsafe.Pointer, n uintpt
} }
} }
func hashmapKeyHashAlg(alg hashmapAlgorithm) func(key unsafe.Pointer, n uintptr) uint32 { func hashmapKeyHashAlg(alg hashmapAlgorithm) func(key unsafe.Pointer, n, seed uintptr) uint32 {
switch alg { switch alg {
case hashmapAlgorithmBinary: case hashmapAlgorithmBinary:
return hash32 return hash32
@ -148,12 +150,14 @@ func hashmapLenUnsafePointer(p unsafe.Pointer) int {
// Set a specified key to a given value. Grow the map if necessary. // Set a specified key to a given value. Grow the map if necessary.
//go:nobounds //go:nobounds
func hashmapSet(m *hashmap, key unsafe.Pointer, value unsafe.Pointer, hash uint32) { func hashmapSet(m *hashmap, key unsafe.Pointer, value unsafe.Pointer, hash uint32) {
tophash := hashmapTopHash(hash)
if hashmapShouldGrow(m) { if hashmapShouldGrow(m) {
hashmapGrow(m) hashmapGrow(m)
// seed changed when we grew; rehash key with new seed
hash = m.keyHash(key, uintptr(m.keySize), m.seed)
} }
tophash := hashmapTopHash(hash)
numBuckets := uintptr(1) << m.bucketBits numBuckets := uintptr(1) << m.bucketBits
bucketNumber := (uintptr(hash) & (numBuckets - 1)) bucketNumber := (uintptr(hash) & (numBuckets - 1))
bucketSize := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*8 bucketSize := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*8
@ -221,10 +225,10 @@ func hashmapInsertIntoNewBucket(m *hashmap, key, value unsafe.Pointer, tophash u
} }
func hashmapGrow(m *hashmap) { func hashmapGrow(m *hashmap) {
// clone map as empty // clone map as empty
n := *m n := *m
n.count = 0 n.count = 0
n.seed = uintptr(fastrand())
// allocate our new buckets twice as big // allocate our new buckets twice as big
n.bucketBits = m.bucketBits + 1 n.bucketBits = m.bucketBits + 1
@ -239,7 +243,7 @@ func hashmapGrow(m *hashmap) {
var value = alloc(uintptr(m.valueSize), nil) var value = alloc(uintptr(m.valueSize), nil)
for hashmapNext(m, &it, key, value) { for hashmapNext(m, &it, key, value) {
h := m.keyHash(key, uintptr(m.keySize)) h := n.keyHash(key, uintptr(n.keySize), n.seed)
hashmapSet(&n, key, value, h) hashmapSet(&n, key, value, h)
} }
@ -386,7 +390,7 @@ func hashmapNext(m *hashmap, it *hashmapIterator, key, value unsafe.Pointer) boo
// Our view of the buckets doesn't match the parent map. // Our view of the buckets doesn't match the parent map.
// Look up the key in the new buckets and return that value if it exists // Look up the key in the new buckets and return that value if it exists
hash := m.keyHash(key, uintptr(m.keySize)) hash := m.keyHash(key, uintptr(m.keySize), m.seed)
ok := hashmapGet(m, key, value, uintptr(m.valueSize), hash) ok := hashmapGet(m, key, value, uintptr(m.valueSize), hash)
if !ok { if !ok {
// doesn't exist in parent map; try next key // doesn't exist in parent map; try next key
@ -401,10 +405,9 @@ func hashmapNext(m *hashmap, it *hashmapIterator, key, value unsafe.Pointer) boo
} }
// Hashmap with plain binary data keys (not containing strings etc.). // Hashmap with plain binary data keys (not containing strings etc.).
func hashmapBinarySet(m *hashmap, key, value unsafe.Pointer) { func hashmapBinarySet(m *hashmap, key, value unsafe.Pointer) {
// TODO: detect nil map here and throw a better panic message? // TODO: detect nil map here and throw a better panic message?
hash := hash32(key, uintptr(m.keySize)) hash := hash32(key, uintptr(m.keySize), m.seed)
hashmapSet(m, key, value, hash) hashmapSet(m, key, value, hash)
} }
@ -413,7 +416,7 @@ func hashmapBinaryGet(m *hashmap, key, value unsafe.Pointer, valueSize uintptr)
memzero(value, uintptr(valueSize)) memzero(value, uintptr(valueSize))
return false return false
} }
hash := hash32(key, uintptr(m.keySize)) hash := hash32(key, uintptr(m.keySize), m.seed)
return hashmapGet(m, key, value, valueSize, hash) return hashmapGet(m, key, value, valueSize, hash)
} }
@ -421,7 +424,7 @@ func hashmapBinaryDelete(m *hashmap, key unsafe.Pointer) {
if m == nil { if m == nil {
return return
} }
hash := hash32(key, uintptr(m.keySize)) hash := hash32(key, uintptr(m.keySize), m.seed)
hashmapDelete(m, key, hash) hashmapDelete(m, key, hash)
} }
@ -431,28 +434,35 @@ func hashmapStringEqual(x, y unsafe.Pointer, n uintptr) bool {
return *(*string)(x) == *(*string)(y) return *(*string)(x) == *(*string)(y)
} }
func hashmapStringHash(s string) uint32 { func hashmapStringHash(s string, seed uintptr) uint32 {
_s := (*_string)(unsafe.Pointer(&s)) _s := (*_string)(unsafe.Pointer(&s))
return hash32(unsafe.Pointer(_s.ptr), uintptr(_s.length)) return hash32(unsafe.Pointer(_s.ptr), uintptr(_s.length), seed)
} }
func hashmapStringPtrHash(sptr unsafe.Pointer, size uintptr) uint32 { func hashmapStringPtrHash(sptr unsafe.Pointer, size uintptr, seed uintptr) uint32 {
_s := *(*_string)(sptr) _s := *(*_string)(sptr)
return hash32(unsafe.Pointer(_s.ptr), uintptr(_s.length)) return hash32(unsafe.Pointer(_s.ptr), uintptr(_s.length), seed)
} }
func hashmapStringSet(m *hashmap, key string, value unsafe.Pointer) { func hashmapStringSet(m *hashmap, key string, value unsafe.Pointer) {
hash := hashmapStringHash(key) hash := hashmapStringHash(key, m.seed)
hashmapSet(m, unsafe.Pointer(&key), value, hash) hashmapSet(m, unsafe.Pointer(&key), value, hash)
} }
func hashmapStringGet(m *hashmap, key string, value unsafe.Pointer, valueSize uintptr) bool { func hashmapStringGet(m *hashmap, key string, value unsafe.Pointer, valueSize uintptr) bool {
hash := hashmapStringHash(key) if m == nil {
memzero(value, uintptr(valueSize))
return false
}
hash := hashmapStringHash(key, m.seed)
return hashmapGet(m, unsafe.Pointer(&key), value, valueSize, hash) return hashmapGet(m, unsafe.Pointer(&key), value, valueSize, hash)
} }
func hashmapStringDelete(m *hashmap, key string) { func hashmapStringDelete(m *hashmap, key string) {
hash := hashmapStringHash(key) if m == nil {
return
}
hash := hashmapStringHash(key, m.seed)
hashmapDelete(m, unsafe.Pointer(&key), hash) hashmapDelete(m, unsafe.Pointer(&key), hash)
} }
@ -465,25 +475,25 @@ func hashmapStringDelete(m *hashmap, key string) {
//go:linkname valueInterfaceUnsafe reflect.valueInterfaceUnsafe //go:linkname valueInterfaceUnsafe reflect.valueInterfaceUnsafe
func valueInterfaceUnsafe(v reflect.Value) interface{} func valueInterfaceUnsafe(v reflect.Value) interface{}
func hashmapFloat32Hash(ptr unsafe.Pointer) uint32 { func hashmapFloat32Hash(ptr unsafe.Pointer, seed uintptr) uint32 {
f := *(*uint32)(ptr) f := *(*uint32)(ptr)
if f == 0x80000000 { if f == 0x80000000 {
// convert -0 to 0 for hashing // convert -0 to 0 for hashing
f = 0 f = 0
} }
return hash32(unsafe.Pointer(&f), 4) return hash32(unsafe.Pointer(&f), 4, seed)
} }
func hashmapFloat64Hash(ptr unsafe.Pointer) uint32 { func hashmapFloat64Hash(ptr unsafe.Pointer, seed uintptr) uint32 {
f := *(*uint64)(ptr) f := *(*uint64)(ptr)
if f == 0x8000000000000000 { if f == 0x8000000000000000 {
// convert -0 to 0 for hashing // convert -0 to 0 for hashing
f = 0 f = 0
} }
return hash32(unsafe.Pointer(&f), 8) return hash32(unsafe.Pointer(&f), 8, seed)
} }
func hashmapInterfaceHash(itf interface{}) uint32 { func hashmapInterfaceHash(itf interface{}, seed uintptr) uint32 {
x := reflect.ValueOf(itf) x := reflect.ValueOf(itf)
if x.RawType() == 0 { if x.RawType() == 0 {
return 0 // nil interface return 0 // nil interface
@ -498,41 +508,41 @@ func hashmapInterfaceHash(itf interface{}) uint32 {
switch x.RawType().Kind() { switch x.RawType().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return hash32(ptr, x.RawType().Size()) return hash32(ptr, x.RawType().Size(), seed)
case reflect.Bool, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: case reflect.Bool, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return hash32(ptr, x.RawType().Size()) return hash32(ptr, x.RawType().Size(), seed)
case reflect.Float32: case reflect.Float32:
// It should be possible to just has the contents. However, NaN != NaN // It should be possible to just has the contents. However, NaN != NaN
// so if you're using lots of NaNs as map keys (you shouldn't) then hash // so if you're using lots of NaNs as map keys (you shouldn't) then hash
// time may become exponential. To fix that, it would be better to // time may become exponential. To fix that, it would be better to
// return a random number instead: // return a random number instead:
// https://research.swtch.com/randhash // https://research.swtch.com/randhash
return hashmapFloat32Hash(ptr) return hashmapFloat32Hash(ptr, seed)
case reflect.Float64: case reflect.Float64:
return hashmapFloat64Hash(ptr) return hashmapFloat64Hash(ptr, seed)
case reflect.Complex64: case reflect.Complex64:
rptr, iptr := ptr, unsafe.Pointer(uintptr(ptr)+4) rptr, iptr := ptr, unsafe.Pointer(uintptr(ptr)+4)
return hashmapFloat32Hash(rptr) ^ hashmapFloat32Hash(iptr) return hashmapFloat32Hash(rptr, seed) ^ hashmapFloat32Hash(iptr, seed)
case reflect.Complex128: case reflect.Complex128:
rptr, iptr := ptr, unsafe.Pointer(uintptr(ptr)+8) rptr, iptr := ptr, unsafe.Pointer(uintptr(ptr)+8)
return hashmapFloat64Hash(rptr) ^ hashmapFloat64Hash(iptr) return hashmapFloat64Hash(rptr, seed) ^ hashmapFloat64Hash(iptr, seed)
case reflect.String: case reflect.String:
return hashmapStringHash(x.String()) return hashmapStringHash(x.String(), seed)
case reflect.Chan, reflect.Ptr, reflect.UnsafePointer: case reflect.Chan, reflect.Ptr, reflect.UnsafePointer:
// It might seem better to just return the pointer, but that won't // It might seem better to just return the pointer, but that won't
// result in an evenly distributed hashmap. Instead, hash the pointer // result in an evenly distributed hashmap. Instead, hash the pointer
// like most other types. // like most other types.
return hash32(ptr, x.RawType().Size()) return hash32(ptr, x.RawType().Size(), seed)
case reflect.Array: case reflect.Array:
var hash uint32 var hash uint32
for i := 0; i < x.Len(); i++ { for i := 0; i < x.Len(); i++ {
hash ^= hashmapInterfaceHash(valueInterfaceUnsafe(x.Index(i))) hash ^= hashmapInterfaceHash(valueInterfaceUnsafe(x.Index(i)), seed)
} }
return hash return hash
case reflect.Struct: case reflect.Struct:
var hash uint32 var hash uint32
for i := 0; i < x.NumField(); i++ { for i := 0; i < x.NumField(); i++ {
hash ^= hashmapInterfaceHash(valueInterfaceUnsafe(x.Field(i))) hash ^= hashmapInterfaceHash(valueInterfaceUnsafe(x.Field(i)), seed)
} }
return hash return hash
default: default:
@ -541,9 +551,9 @@ func hashmapInterfaceHash(itf interface{}) uint32 {
} }
} }
func hashmapInterfacePtrHash(iptr unsafe.Pointer, size uintptr) uint32 { func hashmapInterfacePtrHash(iptr unsafe.Pointer, size uintptr, seed uintptr) uint32 {
_i := *(*_interface)(iptr) _i := *(*_interface)(iptr)
return hashmapInterfaceHash(_i) return hashmapInterfaceHash(_i, seed)
} }
func hashmapInterfaceEqual(x, y unsafe.Pointer, n uintptr) bool { func hashmapInterfaceEqual(x, y unsafe.Pointer, n uintptr) bool {
@ -551,16 +561,23 @@ func hashmapInterfaceEqual(x, y unsafe.Pointer, n uintptr) bool {
} }
func hashmapInterfaceSet(m *hashmap, key interface{}, value unsafe.Pointer) { func hashmapInterfaceSet(m *hashmap, key interface{}, value unsafe.Pointer) {
hash := hashmapInterfaceHash(key) hash := hashmapInterfaceHash(key, m.seed)
hashmapSet(m, unsafe.Pointer(&key), value, hash) hashmapSet(m, unsafe.Pointer(&key), value, hash)
} }
func hashmapInterfaceGet(m *hashmap, key interface{}, value unsafe.Pointer, valueSize uintptr) bool { func hashmapInterfaceGet(m *hashmap, key interface{}, value unsafe.Pointer, valueSize uintptr) bool {
hash := hashmapInterfaceHash(key) if m == nil {
memzero(value, uintptr(valueSize))
return false
}
hash := hashmapInterfaceHash(key, m.seed)
return hashmapGet(m, unsafe.Pointer(&key), value, valueSize, hash) return hashmapGet(m, unsafe.Pointer(&key), value, valueSize, hash)
} }
func hashmapInterfaceDelete(m *hashmap, key interface{}) { func hashmapInterfaceDelete(m *hashmap, key interface{}) {
hash := hashmapInterfaceHash(key) if m == nil {
return
}
hash := hashmapInterfaceHash(key, m.seed)
hashmapDelete(m, unsafe.Pointer(&key), hash) hashmapDelete(m, unsafe.Pointer(&key), hash)
} }