Preliminary implementation of a hashmap, unfinished

Missing features:
  * keys other than strings
  * more than 8 values in the hashmap
  * growing a map when needed
  * initial size hint
  * delete(m, key)
  * iterators (for range)
  * initializing global maps
  * ...more?
Этот коммит содержится в:
Ayke van Laethem 2018-08-22 04:50:24 +02:00
родитель 8fb9cd4e23
коммит 3a6ef38041
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: E97FF5335DFDFDED
5 изменённых файлов: 290 добавлений и 21 удалений

Просмотреть файл

@ -46,11 +46,11 @@ Currently supported features:
* standard library (but most packages won't work due to missing language
features)
* slices (partially)
* maps (very rough, unfinished)
Not yet supported:
* float, complex, etc.
* maps
* garbage collection
* defer
* closures

Просмотреть файл

@ -431,6 +431,8 @@ func (c *Compiler) getLLVMType(goType types.Type) (llvm.Type, error) {
}
case *types.Interface:
return c.mod.GetTypeByName("interface"), nil
case *types.Map:
return llvm.PointerType(c.mod.GetTypeByName("runtime.hashmap"), 0), nil
case *types.Named:
if _, ok := typ.Underlying().(*types.Struct); ok {
llvmType := c.mod.GetTypeByName(typ.Obj().Pkg().Path() + "." + typ.Obj().Name())
@ -878,6 +880,36 @@ func (c *Compiler) parseInstr(frame *Frame, instr ssa.Instruction) error {
blockJump := frame.blocks[instr.Block().Succs[0]]
c.builder.CreateBr(blockJump)
return nil
case *ssa.MapUpdate:
m, err := c.parseExpr(frame, instr.Map)
if err != nil {
return err
}
key, err := c.parseExpr(frame, instr.Key)
if err != nil {
return err
}
value, err := c.parseExpr(frame, instr.Value)
if err != nil {
return err
}
mapType := instr.Map.Type().Underlying().(*types.Map)
switch keyType := mapType.Key().Underlying().(type) {
case *types.Basic:
if keyType.Kind() == types.String {
valueAlloca := c.builder.CreateAlloca(value.Type(), "hashmap.value")
c.builder.CreateStore(value, valueAlloca)
valuePtr := c.builder.CreateBitCast(valueAlloca, c.i8ptrType, "hashmap.valueptr")
params := []llvm.Value{m, key, valuePtr}
fn := c.mod.NamedFunction("runtime.hashmapSet")
c.builder.CreateCall(fn, params, "")
return nil
} else {
return errors.New("todo: map update key type: " + keyType.String())
}
default:
return errors.New("todo: map update key type: " + keyType.String())
}
case *ssa.Panic:
value, err := c.parseExpr(frame, instr.X)
if err != nil {
@ -1292,13 +1324,6 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) {
if expr.CommaOk {
return llvm.Value{}, errors.New("todo: lookup with comma-ok")
}
if _, ok := expr.X.Type().(*types.Map); ok {
return llvm.Value{}, errors.New("todo: lookup in map")
}
// Value type must be a string, which is a basic type.
if expr.X.Type().(*types.Basic).Kind() != types.String {
panic("lookup on non-string?")
}
value, err := c.parseExpr(frame, expr.X)
if err != nil {
return llvm.Value{}, nil
@ -1307,21 +1332,50 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) {
if err != nil {
return llvm.Value{}, nil
}
// Bounds check.
// LLVM optimizes this away in most cases.
if frame.fn.llvmFn.Name() != "runtime.lookupBoundsCheck" {
length, err := c.parseBuiltin(frame, []ssa.Value{expr.X}, "len")
if err != nil {
return llvm.Value{}, err // shouldn't happen
switch xType := expr.X.Type().(type) {
case *types.Basic:
// Value type must be a string, which is a basic type.
if xType.Kind() != types.String {
panic("lookup on non-string?")
}
c.builder.CreateCall(c.mod.NamedFunction("runtime.lookupBoundsCheck"), []llvm.Value{length, index}, "")
}
// Lookup byte
buf := c.builder.CreateExtractValue(value, 1, "")
bufPtr := c.builder.CreateGEP(buf, []llvm.Value{index}, "")
return c.builder.CreateLoad(bufPtr, ""), nil
// Bounds check.
// LLVM optimizes this away in most cases.
if frame.fn.llvmFn.Name() != "runtime.lookupBoundsCheck" {
length, err := c.parseBuiltin(frame, []ssa.Value{expr.X}, "len")
if err != nil {
return llvm.Value{}, err // shouldn't happen
}
c.builder.CreateCall(c.mod.NamedFunction("runtime.lookupBoundsCheck"), []llvm.Value{length, index}, "")
}
// Lookup byte
buf := c.builder.CreateExtractValue(value, 1, "")
bufPtr := c.builder.CreateGEP(buf, []llvm.Value{index}, "")
return c.builder.CreateLoad(bufPtr, ""), nil
case *types.Map:
switch keyType := xType.Key().Underlying().(type) {
case *types.Basic:
if keyType.Kind() == types.String {
llvmValueType, err := c.getLLVMType(expr.Type())
if err != nil {
return llvm.Value{}, err
}
mapValueAlloca := c.builder.CreateAlloca(llvmValueType, "hashmap.value")
mapValuePtr := c.builder.CreateBitCast(mapValueAlloca, c.i8ptrType, "hashmap.valueptr")
params := []llvm.Value{value, index, mapValuePtr}
fn := c.mod.NamedFunction("runtime.hashmapGet")
c.builder.CreateCall(fn, params, "")
return c.builder.CreateLoad(mapValueAlloca, ""), nil
} else {
return llvm.Value{}, errors.New("todo: map lookup key type: " + keyType.String())
}
default:
return llvm.Value{}, errors.New("todo: map lookup key type: " + keyType.String())
}
default:
panic("unknown lookup type: " + expr.String())
}
case *ssa.MakeInterface:
val, err := c.parseExpr(frame, expr.X)
if err != nil {
@ -1359,6 +1413,63 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) {
itf := llvm.ConstNamedStruct(c.mod.GetTypeByName("interface"), []llvm.Value{llvm.ConstInt(llvm.Int32Type(), uint64(itfTypeNum), false), llvm.Undef(c.i8ptrType)})
itf = c.builder.CreateInsertValue(itf, itfValue, 1, "")
return itf, nil
case *ssa.MakeMap:
mapType := expr.Type().Underlying().(*types.Map)
llvmKeyType, err := c.getLLVMType(mapType.Key().Underlying())
if err != nil {
return llvm.Value{}, err
}
llvmValueType, err := c.getLLVMType(mapType.Elem().Underlying())
if err != nil {
return llvm.Value{}, err
}
switch keyType := mapType.Key().Underlying().(type) {
case *types.Basic:
if keyType.Kind() == types.String {
// Create hashmap
llvmType := c.mod.GetTypeByName("runtime.hashmap")
size := llvm.ConstInt(c.uintptrType, c.targetData.TypeAllocSize(llvmType), false)
buf := c.builder.CreateCall(c.allocFunc, []llvm.Value{size}, "")
buf = c.builder.CreateBitCast(buf, llvm.PointerType(llvmType, 0), "")
// Set keySize
keySize := c.targetData.TypeAllocSize(llvmKeyType)
keyIndices := []llvm.Value{
llvm.ConstInt(llvm.Int32Type(), 0, false),
llvm.ConstInt(llvm.Int32Type(), 3, false), // keySize uint8
}
keySizePtr := c.builder.CreateGEP(buf, keyIndices, "hashmap.keySize")
c.builder.CreateStore(llvm.ConstInt(llvm.Int8Type(), keySize, false), keySizePtr)
// Set valueSize
valueSize := c.targetData.TypeAllocSize(llvmValueType)
valueIndices := []llvm.Value{
llvm.ConstInt(llvm.Int32Type(), 0, false),
llvm.ConstInt(llvm.Int32Type(), 4, false), // valueSize uint8
}
valueSizePtr := c.builder.CreateGEP(buf, valueIndices, "hashmap.valueSize")
c.builder.CreateStore(llvm.ConstInt(llvm.Int8Type(), valueSize, false), valueSizePtr)
// Create initial bucket
bucketType := c.mod.GetTypeByName("runtime.hashmapBucket")
bucketSize := c.targetData.TypeAllocSize(bucketType) + keySize*8 + valueSize*8
bucketSizeValue := llvm.ConstInt(c.uintptrType, bucketSize, false)
bucket := c.builder.CreateCall(c.allocFunc, []llvm.Value{bucketSizeValue}, "")
// Set initial bucket
bucketIndices := []llvm.Value{
llvm.ConstInt(llvm.Int32Type(), 0, false),
llvm.ConstInt(llvm.Int32Type(), 1, false), // buckets unsafe.Pointer
}
bucketsElementPtr := c.builder.CreateGEP(buf, bucketIndices, "hashmap.buckets")
c.builder.CreateStore(bucket, bucketsElementPtr)
return buf, nil
} else {
return llvm.Value{}, errors.New("todo: map key type: " + keyType.String())
}
default:
return llvm.Value{}, errors.New("todo: map key type: " + keyType.String())
}
case *ssa.Phi:
t, err := c.getLLVMType(expr.Type())
if err != nil {

Просмотреть файл

@ -23,6 +23,9 @@ func main() {
println("sumrange(100) =", sumrange(100))
println("strlen foo:", strlen("foo"))
m := map[string]int{"answer": 42, "foo": 3}
readMap(m, "answer")
foo := []int{1, 2, 4, 5}
println("len/cap foo:", len(foo), cap(foo))
println("foo[3]:", foo[3])
@ -46,6 +49,10 @@ func runFunc(f func(int), arg int) {
f(arg)
}
func readMap(m map[string]int, key string) {
println("map read:", key, "=", m[key])
}
func hello(n int) {
println("hello from function pointer:", n)
}

133
src/runtime/hashmap.go Обычный файл
Просмотреть файл

@ -0,0 +1,133 @@
package runtime
// This is a hashmap implementation for the map[T]T type.
// It is very rougly based on the implementation of the Go hashmap:
//
// https://golang.org/src/runtime/hashmap.go
import (
"unsafe"
)
// The underlying hashmap structure for Go.
type hashmap struct {
next *hashmap // hashmap after evacuate (for iterators)
buckets unsafe.Pointer // pointer to array of buckets
count uint
keySize uint8 // maybe this can store the key type as well? E.g. keysize == 5 means string?
valueSize uint8
bucketBits uint8
}
// A hashmap bucket. A bucket is a container of 8 key/value pairs: first the
// following two entries, then the 8 keys, then the 8 values. This somewhat odd
// ordering is to make sure the keys and values are well aligned when one of
// them is smaller than the system word size.
type hashmapBucket struct {
tophash [8]uint8
next *hashmapBucket // next bucket (if there are more than 8 in a chain)
// Followed by the actual keys, and then the actual values. These are
// allocated but as they're of variable size they can't be shown here.
}
// Get FNV-1a hash of this string.
//
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
func stringhash(s *string) uint32 {
var result uint32 = 2166136261 // FNV offset basis
for i := 0; i < len(*s); i++ {
result ^= uint32((*s)[i])
result *= 16777619 // FNV prime
}
return result
}
// Set a specified key to a given value. Grow the map if necessary.
func hashmapSet(m *hashmap, key string, value unsafe.Pointer) {
hash := stringhash(&key)
numBuckets := uintptr(1) << m.bucketBits
bucketNumber := (uintptr(hash) & (numBuckets - 1))
bucketSize := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*8
bucketAddr := uintptr(m.buckets) + bucketSize*bucketNumber
bucket := (*hashmapBucket)(unsafe.Pointer(bucketAddr))
tophash := uint8(hash >> 24)
if tophash < 1 {
// 0 means empty slot, so make it bigger.
tophash += 1
}
// See whether the key already exists somewhere.
var emptySlotKey *string
var emptySlotValue unsafe.Pointer
var emptySlotTophash *byte
for bucket != nil {
for i := uintptr(0); i < 8; i++ {
slotKeyOffset := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*uintptr(i)
slotKey := (*string)(unsafe.Pointer(bucketAddr + slotKeyOffset))
slotValueOffset := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*uintptr(i)
slotValue := unsafe.Pointer(bucketAddr + slotValueOffset)
if bucket.tophash[i] == 0 && emptySlotKey == nil {
// Found an empty slot, store it for if we couldn't find an
// existing slot.
emptySlotKey = slotKey
emptySlotValue = slotValue
emptySlotTophash = &bucket.tophash[i]
}
if bucket.tophash[i] == tophash {
// Could be an existing value that's the same.
if key == *slotKey {
// found same key, replace it
memcpy(slotValue, value, uintptr(m.valueSize))
return
}
}
}
bucket = bucket.next
}
if emptySlotKey != nil {
*emptySlotKey = key
memcpy(emptySlotValue, value, uintptr(m.valueSize))
*emptySlotTophash = tophash
return
}
panic("todo: hashmap: grow bucket")
}
// Get the value of a specified key, or zero the value if not found.
func hashmapGet(m *hashmap, key string, value unsafe.Pointer) {
hash := stringhash(&key)
numBuckets := uintptr(1) << m.bucketBits
bucketNumber := (uintptr(hash) & (numBuckets - 1))
bucketSize := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*8
bucketAddr := uintptr(m.buckets) + bucketSize*bucketNumber
bucket := (*hashmapBucket)(unsafe.Pointer(bucketAddr))
tophash := uint8(hash >> 24)
if tophash < 1 {
// 0 means empty slot, so make it bigger.
tophash += 1
}
// Try to find the key.
for bucket != nil {
for i := uintptr(0); i < 8; i++ {
slotKeyOffset := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*uintptr(i)
slotKey := (*string)(unsafe.Pointer(bucketAddr + slotKeyOffset))
slotValueOffset := unsafe.Sizeof(hashmapBucket{}) + uintptr(m.keySize)*8 + uintptr(m.valueSize)*uintptr(i)
slotValue := unsafe.Pointer(bucketAddr + slotValueOffset)
if bucket.tophash[i] == tophash {
// This could be the key we're looking for.
if key == *slotKey {
// Found the key, copy it.
memcpy(value, slotValue, uintptr(m.valueSize))
return
}
}
}
bucket = bucket.next
}
// Did not find the key.
memzero(value, uintptr(m.valueSize))
}

Просмотреть файл

@ -1,5 +1,9 @@
package runtime
import (
"unsafe"
)
const Compiler = "tgo"
// The bitness of the CPU (e.g. 8, 32, 64). Set by the compiler as a constant.
@ -29,6 +33,20 @@ func stringequal(x, y string) bool {
return true
}
// Copy size bytes from src to dst. The memory areas must not overlap.
func memcpy(dst, src unsafe.Pointer, size uintptr) {
for i := uintptr(0); i < size; i++ {
*(*uint8)(unsafe.Pointer(uintptr(dst) + i)) = *(*uint8)(unsafe.Pointer(uintptr(src) + i))
}
}
// Set the given number of bytes to zero.
func memzero(ptr unsafe.Pointer, size uintptr) {
for i := uintptr(0); i < size; i++ {
*(*byte)(unsafe.Pointer(uintptr(ptr) + size)) = 0
}
}
func _panic(message interface{}) {
printstring("panic: ")
printitf(message)