From 95721a8d8cf6a07ecfb9b97d270e605c2bc3f8fd Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 4 Aug 2019 12:07:22 +0200 Subject: [PATCH] reflect: add support for named types With this change, it becomes possible to get the element type of named slices, pointers, and channels. This is a prerequisite to enable the common named struct types. There's more to come. --- compiler/reflect.go | 217 ++++++++++++++++++++++++++++++++------ src/reflect/sidetables.go | 31 ++++++ src/reflect/type.go | 8 +- testdata/reflect.go | 5 + testdata/reflect.txt | 21 ++++ 5 files changed, 246 insertions(+), 36 deletions(-) create mode 100644 src/reflect/sidetables.go diff --git a/compiler/reflect.go b/compiler/reflect.go index cc96b6f1..3af0bfb4 100644 --- a/compiler/reflect.go +++ b/compiler/reflect.go @@ -1,5 +1,32 @@ package compiler +// This file has some compiler support for run-time reflection using the reflect +// package. In particular, it encodes type information in type codes in such a +// way that the reflect package can decode the type from this information. +// Where needed, it also adds some side tables for looking up more information +// about a type, when that information cannot be stored directly in the type +// code. +// +// Go has 26 different type kinds. +// +// Type kinds are subdivided in basic types (see the list of basicTypes below) +// that are mostly numeric literals and non-basic (or "complex") types that are +// more difficult to encode. These non-basic types come in two forms: +// * Prefix types (pointer, slice, interface, channel): these just add +// something to an existing type. For example, a pointer like *int just adds +// the fact that it's a pointer to an existing type (int). +// These are encoded efficiently by adding a prefix to a type code. +// * Types with multiple fields (struct, array, func, map). All of these have +// multiple fields contained within. Most obviously structs can contain many +// types as fields. Also arrays contain not just the element type but also +// the length parameter which can be any arbitrary number and thus may not +// fit in a type code. +// These types are encoded using side tables. +// +// This distinction is also important for how named types are encoded. At the +// moment, named basic type just get a unique number assigned while named +// non-basic types have their underlying type stored in a sidetable. + import ( "math/big" "strings" @@ -7,6 +34,8 @@ import ( "tinygo.org/x/go-llvm" ) +// A list of basic types and their numbers. This list should be kept in sync +// with the list of Kind constants of type.go in the runtime package. var basicTypes = map[string]int64{ "bool": 1, "int": 2, @@ -28,6 +57,44 @@ var basicTypes = map[string]int64{ "unsafeptr": 18, } +// typeCodeAssignmentState keeps some global state around for type code +// assignments, used to assign one unique type code to each Go type. +type typeCodeAssignmentState struct { + // An integer that's incremented each time it's used to give unique IDs to + // type codes that are not yet fully supported otherwise by the reflect + // package (or are simply unused in the compiled program). + fallbackIndex int + + // Map of named types to their type code. It is important that named types + // get unique IDs for each type. + namedBasicTypes map[string]int + namedNonBasicTypes map[string]int + + // This byte array is stored in reflect.namedNonBasicTypesSidetable and is + // used at runtime to get details about a named non-basic type. + // Entries are varints (see makeVarint below and readVarint in + // reflect/sidetables.go for the encoding): one varint per entry. The + // integers in namedNonBasicTypes are indices into this array. Because these + // are varints, most type codes are really small (just one byte). + // + // Note that this byte buffer is not created when it is not needed + // (reflect.namedNonBasicTypesSidetable has no uses), see + // needsNamedTypesSidetable. + namedNonBasicTypesSidetable []byte + + // This is the length of an uintptr. Only used occasionally to know whether + // a given number can be encoded as a varint. + uintptrLen int + + // This indicates whether namedNonBasicTypesSidetable needs to be created at + // all. If it is false, namedNonBasicTypesSidetable will contain simple + // monotonically increasing numbers. + needsNamedNonBasicTypesSidetable bool +} + +// assignTypeCodes is used to assign a type code to each type in the program +// that is ever stored in an interface. It tries to use the smallest possible +// numbers to make the code that works with interfaces as small as possible. func (c *Compiler) assignTypeCodes(typeSlice typeInfoSlice) { fn := c.mod.NamedFunction("reflect.ValueOf") if fn.IsNil() { @@ -40,10 +107,15 @@ func (c *Compiler) assignTypeCodes(typeSlice typeInfoSlice) { } // Assign typecodes the way the reflect package expects. - fallbackIndex := 1 - namedTypes := make(map[string]int) + state := typeCodeAssignmentState{ + fallbackIndex: 1, + namedBasicTypes: make(map[string]int), + namedNonBasicTypes: make(map[string]int), + uintptrLen: c.uintptrType.IntTypeWidth(), + needsNamedNonBasicTypesSidetable: len(getUses(c.mod.NamedGlobal("reflect.namedNonBasicTypesSidetable"))) != 0, + } for _, t := range typeSlice { - num := c.getTypeCodeNum(t.typecode, &fallbackIndex, namedTypes) + num := c.getTypeCodeNum(t.typecode, &state) if num.BitLen() > c.uintptrType.IntTypeWidth() || !num.IsUint64() { // TODO: support this in some way, using a side table for example. // That's less efficient but better than not working at all. @@ -53,12 +125,32 @@ func (c *Compiler) assignTypeCodes(typeSlice typeInfoSlice) { } t.num = num.Uint64() } + + // Only create this sidetable when it is necessary. + if state.needsNamedNonBasicTypesSidetable { + // Create the sidetable and replace the old dummy global with this value. + globalType := llvm.ArrayType(c.ctx.Int8Type(), len(state.namedNonBasicTypesSidetable)) + global := llvm.AddGlobal(c.mod, globalType, "reflect.namedNonBasicTypesSidetable.tmp") + value := llvm.Undef(globalType) + for i, ch := range state.namedNonBasicTypesSidetable { + value = llvm.ConstInsertValue(value, llvm.ConstInt(c.ctx.Int8Type(), uint64(ch), false), []uint32{uint32(i)}) + } + global.SetInitializer(value) + oldGlobal := c.mod.NamedGlobal("reflect.namedNonBasicTypesSidetable") + gep := llvm.ConstGEP(global, []llvm.Value{ + llvm.ConstInt(c.ctx.Int32Type(), 0, false), + llvm.ConstInt(c.ctx.Int32Type(), 0, false), + }) + oldGlobal.ReplaceAllUsesWith(gep) + oldGlobal.EraseFromParentAsGlobal() + global.SetName("reflect.namedNonBasicTypesSidetable") + } } // getTypeCodeNum returns the typecode for a given type as expected by the // reflect package. Also see getTypeCodeName, which serializes types to a string // based on a types.Type value for this function. -func (c *Compiler) getTypeCodeNum(typecode llvm.Value, fallbackIndex *int, namedTypes map[string]int) *big.Int { +func (c *Compiler) getTypeCodeNum(typecode llvm.Value, state *typeCodeAssignmentState) *big.Int { // Note: see src/reflect/type.go for bit allocations. class, value := getClassAndValueFromTypeCode(typecode) name := "" @@ -78,51 +170,51 @@ func (c *Compiler) getTypeCodeNum(typecode llvm.Value, fallbackIndex *int, named } if name != "" { // This type is named, set the upper bits to the name ID. - num |= int64(getNamedTypeNum(namedTypes, name)) << 5 + num |= int64(state.getBasicNamedTypeNum(name)) << 5 } return big.NewInt(num << 1) } else { - // Complex types use the following bit pattern: + // Non-baisc types use the following bit pattern: // ...nxxx1 - // where xxx indicates the complex type (any non-basic type). The upper - // bits contain whatever the type contains. Types that wrap a single - // other type (channel, interface, pointer, slice) just contain the bits - // of the wrapped type. Other types (like struct) have a different - // method of encoding the contents of the type. + // where xxx indicates the non-basic type. The upper bits contain + // whatever the type contains. Types that wrap a single other type + // (channel, interface, pointer, slice) just contain the bits of the + // wrapped type. Other types (like struct) need more fields and thus + // cannot be encoded as a simple prefix. var num *big.Int var classNumber int64 switch class { case "chan": sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) - num = c.getTypeCodeNum(sub, fallbackIndex, namedTypes) + num = c.getTypeCodeNum(sub, state) classNumber = 0 case "interface": - num = big.NewInt(int64(*fallbackIndex)) - *fallbackIndex++ + num = big.NewInt(int64(state.fallbackIndex)) + state.fallbackIndex++ classNumber = 1 case "pointer": sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) - num = c.getTypeCodeNum(sub, fallbackIndex, namedTypes) + num = c.getTypeCodeNum(sub, state) classNumber = 2 case "slice": sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) - num = c.getTypeCodeNum(sub, fallbackIndex, namedTypes) + num = c.getTypeCodeNum(sub, state) classNumber = 3 case "array": - num = big.NewInt(int64(*fallbackIndex)) - *fallbackIndex++ + num = big.NewInt(int64(state.fallbackIndex)) + state.fallbackIndex++ classNumber = 4 case "func": - num = big.NewInt(int64(*fallbackIndex)) - *fallbackIndex++ + num = big.NewInt(int64(state.fallbackIndex)) + state.fallbackIndex++ classNumber = 5 case "map": - num = big.NewInt(int64(*fallbackIndex)) - *fallbackIndex++ + num = big.NewInt(int64(state.fallbackIndex)) + state.fallbackIndex++ classNumber = 6 case "struct": - num = big.NewInt(int64(*fallbackIndex)) - *fallbackIndex++ + num = big.NewInt(int64(state.fallbackIndex)) + state.fallbackIndex++ classNumber = 7 default: panic("unknown type kind: " + class) @@ -130,8 +222,7 @@ func (c *Compiler) getTypeCodeNum(typecode llvm.Value, fallbackIndex *int, named if name == "" { num.Lsh(num, 5).Or(num, big.NewInt((classNumber<<1)+1)) } else { - // TODO: store num in a sidetable - num = big.NewInt(int64(getNamedTypeNum(namedTypes, name))<<1 | 1) + num = big.NewInt(int64(state.getNonBasicNamedTypeNum(name, num))<<1 | 1) num.Lsh(num, 4).Or(num, big.NewInt((classNumber<<1)+1)) } return num @@ -157,15 +248,71 @@ func getClassAndValueFromTypeCode(typecode llvm.Value) (class, value string) { return } -// getNamedTypeNum returns an appropriate (unique) number for the given named -// type. If the name already has a number that number is returned, else a new -// number is returned. The number is always non-zero. -func getNamedTypeNum(namedTypes map[string]int, name string) int { - if num, ok := namedTypes[name]; ok { - return num - } else { - num = len(namedTypes) + 1 - namedTypes[name] = num +// getBasicNamedTypeNum returns an appropriate (unique) number for the given +// named type. If the name already has a number that number is returned, else a +// new number is returned. The number is always non-zero. +func (state *typeCodeAssignmentState) getBasicNamedTypeNum(name string) int { + if num, ok := state.namedBasicTypes[name]; ok { return num } + num := len(state.namedBasicTypes) + 1 + state.namedBasicTypes[name] = num + return num +} + +// getNonBasicNamedTypeNum returns a number unique for this named type. It tries +// to return the smallest number possible to make encoding of this type code +// easier. +func (state *typeCodeAssignmentState) getNonBasicNamedTypeNum(name string, value *big.Int) int { + if num, ok := state.namedNonBasicTypes[name]; ok { + return num + } + if !state.needsNamedNonBasicTypesSidetable { + // Use simple small integers in this case, to make these numbers + // smaller. + num := len(state.namedNonBasicTypes) + 1 + state.namedNonBasicTypes[name] = num + return num + } + num := len(state.namedNonBasicTypesSidetable) + if value.BitLen() > state.uintptrLen || !value.IsUint64() { + panic("cannot store value in sidetable") + } + state.namedNonBasicTypesSidetable = append(state.namedNonBasicTypesSidetable, makeVarint(value.Uint64())...) + state.namedNonBasicTypes[name] = num + return num +} + +// makeVarint encodes a varint in a way that should be easy to decode. +// It may need to be decoded very quickly at runtime at low-powered processors +// so should be efficient to decode. +// The current algorithm is probably not even close to efficient, but it is easy +// to change as the format is only used inside the same program. +func makeVarint(n uint64) []byte { + // This is the reverse of what src/runtime/sidetables.go does. + buf := make([]byte, 0, 8) + for { + c := byte(n & 0x7f << 1) + n >>= 7 + if n != 0 { + c |= 1 + } + buf = append(buf, c) + if n == 0 { + break + } + } + reverseBytes(buf) + return buf +} + +func reverseBytes(s []byte) { + // Actually copied from https://blog.golang.org/why-generics + first := 0 + last := len(s) - 1 + for first < last { + s[first], s[last] = s[last], s[first] + first++ + last-- + } } diff --git a/src/reflect/sidetables.go b/src/reflect/sidetables.go new file mode 100644 index 00000000..75fb80b1 --- /dev/null +++ b/src/reflect/sidetables.go @@ -0,0 +1,31 @@ +package reflect + +import ( + "unsafe" +) + +// This stores a varint for each named type. Named types are identified by their +// name instead of by their type. The named types stored in this struct are the +// simpler non-basic types: pointer, struct, and channel. +//go:extern reflect.namedNonBasicTypesSidetable +var namedNonBasicTypesSidetable byte + +func readVarint(buf unsafe.Pointer) Type { + var t Type + for { + // Read the next byte. + c := *(*byte)(buf) + + // Add this byte to the type code. The upper 7 bits are the value. + t = t<<7 | Type(c>>1) + + // Check whether this is the last byte of this varint. The lower bit + // indicates whether any bytes follow. + if c%1 == 0 { + return t + } + + // Increment the buf pointer (pointer arithmetic!). + buf = unsafe.Pointer(uintptr(buf) + 1) + } +} diff --git a/src/reflect/type.go b/src/reflect/type.go index 8d1ee977..f921e4cf 100644 --- a/src/reflect/type.go +++ b/src/reflect/type.go @@ -145,9 +145,15 @@ func (t Type) Kind() Kind { func (t Type) Elem() Type { switch t.Kind() { case Chan, Ptr, Slice: + // Look at the 'n' bit in the type code (see the top of this file) to + // see whether this is a named type. if (t>>4)%2 != 0 { - panic("unimplemented: (reflect.Type).Elem() for named types") + // This is a named type. The element type is stored in a sidetable. + namedTypeNum := t >> 5 + return readVarint(unsafe.Pointer(uintptr(unsafe.Pointer(&namedNonBasicTypesSidetable)) + uintptr(namedTypeNum))) } + // Not a named type, so the element type is stored directly in the type + // code. return t >> 5 default: // not implemented: Array, Map panic("unimplemented: (reflect.Type).Elem()") diff --git a/testdata/reflect.go b/testdata/reflect.go index 960d1c08..80c4bd44 100644 --- a/testdata/reflect.go +++ b/testdata/reflect.go @@ -9,6 +9,8 @@ type ( myint int myslice []byte myslice2 []myint + mychan chan int + myptr *int ) func main() { @@ -55,10 +57,12 @@ func main() { unsafe.Pointer(new(int)), // channels zeroChan, + mychan(zeroChan), // pointers new(int), new(error), &n, + myptr(new(int)), // slices []byte{1, 2, 3}, make([]uint8, 2, 5), @@ -70,6 +74,7 @@ func main() { []float64{1, 1.64}, []complex64{1, 1.64 + 0.3i}, []complex128{1, 1.128 + 0.4i}, + myslice{5, 3, 11}, // array [4]int{1, 2, 3, 4}, // functions diff --git a/testdata/reflect.txt b/testdata/reflect.txt index 68ab529f..0d89b6dd 100644 --- a/testdata/reflect.txt +++ b/testdata/reflect.txt @@ -66,6 +66,9 @@ reflect type: unsafe.Pointer reflect type: chan chan: int nil: true +reflect type: chan + chan: int + nil: true reflect type: ptr pointer: true int nil: false @@ -82,6 +85,11 @@ reflect type: ptr nil: false reflect type: int settable=true int: 42 +reflect type: ptr + pointer: true int + nil: false + reflect type: int settable=true + int: 0 reflect type: slice slice: uint8 3 3 pointer: true @@ -181,6 +189,19 @@ reflect type: slice indexing: 1 reflect type: complex128 settable=true complex: (+1.128000e+000+4.000000e-001i) +reflect type: slice + slice: uint8 3 3 + pointer: true + nil: false + indexing: 0 + reflect type: uint8 settable=true + uint: 5 + indexing: 1 + reflect type: uint8 settable=true + uint: 3 + indexing: 2 + reflect type: uint8 settable=true + uint: 11 reflect type: array array reflect type: func