tinygo/transform/reflect.go

package transform

// This file has some compiler support for run-time reflection using the reflect
// package. In particular, it encodes type information in type codes in such a
// way that the reflect package can decode the type from this information.
// Where needed, it also adds some side tables for looking up more information
// about a type, when that information cannot be stored directly in the type
// code.
//
// Go has 26 different type kinds.
//
// Type kinds are subdivided in basic types (see the list of basicTypes below)
// that are mostly numeric literals and non-basic (or "complex") types that are
// more difficult to encode. These non-basic types come in two forms:
//   * Prefix types (pointer, slice, interface, channel): these just add
//     something to an existing type. For example, a pointer like *int just adds
//     the fact that it's a pointer to an existing type (int).
//     These are encoded efficiently by adding a prefix to a type code.
//   * Types with multiple fields (struct, array, func, map). All of these have
//     multiple fields contained within. Most obviously structs can contain many
//     types as fields. Also arrays contain not just the element type but also
//     the length parameter which can be any arbitrary number and thus may not
//     fit in a type code.
//     These types are encoded using side tables.
//
// This distinction is also important for how named types are encoded. At the
// moment, named basic type just get a unique number assigned while named
// non-basic types have their underlying type stored in a sidetable.

import (
	"encoding/binary"
	"go/ast"
	"math/big"
	"sort"
	"strings"

	"tinygo.org/x/go-llvm"
)

// A list of basic types and their numbers. This list should be kept in sync
// with the list of Kind constants of type.go in the reflect package.
var basicTypes = map[string]int64{
	"bool":       1,
	"int":        2,
	"int8":       3,
	"int16":      4,
	"int32":      5,
	"int64":      6,
	"uint":       7,
	"uint8":      8,
	"uint16":     9,
	"uint32":     10,
	"uint64":     11,
	"uintptr":    12,
	"float32":    13,
	"float64":    14,
	"complex64":  15,
	"complex128": 16,
	"string":     17,
	"unsafeptr":  18,
}

// A list of non-basic types. Adding 19 to this number will give the Kind as
// used in src/reflect/types.go, and it must be kept in sync with that list.
var nonBasicTypes = map[string]int64{
	"chan":      0,
	"interface": 1,
	"pointer":   2,
	"slice":     3,
	"array":     4,
	"func":      5,
	"map":       6,
	"struct":    7,
}

// typeCodeAssignmentState keeps some global state around for type code
// assignments, used to assign one unique type code to each Go type.
type typeCodeAssignmentState struct {
	// An integer that's incremented each time it's used to give unique IDs to
	// type codes that are not yet fully supported otherwise by the reflect
	// package (or are simply unused in the compiled program).
	fallbackIndex int

	// This is the length of an uintptr. Only used occasionally to know whether
	// a given number can be encoded as a varint.
	uintptrLen int

	// Map of named types to their type code. It is important that named types
	// get unique IDs for each type.
	namedBasicTypes    map[string]int
	namedNonBasicTypes map[string]int

	// Map of array types to their type code.
	arrayTypes               map[string]int
	arrayTypesSidetable      []byte
	needsArrayTypesSidetable bool

	// Map of struct types to their type code.
	structTypes               map[string]int
	structTypesSidetable      []byte
	needsStructNamesSidetable bool

	// Map of struct names and tags to their name string.
	structNames               map[string]int
	structNamesSidetable      []byte
	needsStructTypesSidetable bool

	// This byte array is stored in reflect.namedNonBasicTypesSidetable and is
	// used at runtime to get details about a named non-basic type.
	// Entries are varints (see makeVarint below and readVarint in
	// reflect/sidetables.go for the encoding): one varint per entry. The
	// integers in namedNonBasicTypes are indices into this array. Because these
	// are varints, most type codes are really small (just one byte).
	//
	// Note that this byte buffer is not created when it is not needed
	// (reflect.namedNonBasicTypesSidetable has no uses), see
	// needsNamedTypesSidetable.
	namedNonBasicTypesSidetable []uint64

	// This indicates whether namedNonBasicTypesSidetable needs to be created at
	// all. If it is false, namedNonBasicTypesSidetable will contain simple
	// monotonically increasing numbers.
	needsNamedNonBasicTypesSidetable bool
}

// LowerReflect is used to assign a type code to each type in the program
// that is ever stored in an interface. It tries to use the smallest possible
// numbers to make the code that works with interfaces as small as possible.
func LowerReflect(mod llvm.Module) {
	// if reflect were not used, we could skip generating the sidetable
	// this does not help in practice, and is difficult to do correctly

	// Obtain slice of all types in the program.
	type typeInfo struct {
		typecode llvm.Value
		name     string
		numUses  int
	}
	var types []*typeInfo
	for global := mod.FirstGlobal(); !global.IsNil(); global = llvm.NextGlobal(global) {
		if strings.HasPrefix(global.Name(), "reflect/types.type:") {
			types = append(types, &typeInfo{
				typecode: global,
				name:     global.Name(),
				numUses:  len(getUses(global)),
			})
		}
	}

	// Sort the slice in a way that often used types are assigned a type code
	// first.
	sort.Slice(types, func(i, j int) bool {
		if types[i].numUses != types[j].numUses {
			return types[i].numUses < types[j].numUses
		}
		// It would make more sense to compare the name in the other direction,
		// but for some reason that increases binary size. Could be a fluke, but
		// could also have some good reason (and possibly hint at a small
		// optimization).
		return types[i].name > types[j].name
	})

	// Assign typecodes the way the reflect package expects.
	uintptrType := mod.Context().IntType(llvm.NewTargetData(mod.DataLayout()).PointerSize() * 8)
	state := typeCodeAssignmentState{
		fallbackIndex:                    1,
		uintptrLen:                       llvm.NewTargetData(mod.DataLayout()).PointerSize() * 8,
		namedBasicTypes:                  make(map[string]int),
		namedNonBasicTypes:               make(map[string]int),
		arrayTypes:                       make(map[string]int),
		structTypes:                      make(map[string]int),
		structNames:                      make(map[string]int),
		needsNamedNonBasicTypesSidetable: len(getUses(mod.NamedGlobal("reflect.namedNonBasicTypesSidetable"))) != 0,
		needsStructTypesSidetable:        len(getUses(mod.NamedGlobal("reflect.structTypesSidetable"))) != 0,
		needsStructNamesSidetable:        len(getUses(mod.NamedGlobal("reflect.structNamesSidetable"))) != 0,
		needsArrayTypesSidetable:         len(getUses(mod.NamedGlobal("reflect.arrayTypesSidetable"))) != 0,
	}
	for _, t := range types {
		num := state.getTypeCodeNum(t.typecode)
		if num.BitLen() > state.uintptrLen || !num.IsUint64() {
			// TODO: support this in some way, using a side table for example.
			// That's less efficient but better than not working at all.
			// Particularly important on systems with 16-bit pointers (e.g.
			// AVR).
			panic("compiler: could not store type code number inside interface type code")
		}

		// Replace each use of the type code global with the constant type code.
		for _, use := range getUses(t.typecode) {
			if use.IsAConstantExpr().IsNil() {
				continue
			}
			typecode := llvm.ConstInt(uintptrType, num.Uint64(), false)
			switch use.Opcode() {
			case llvm.PtrToInt:
				// Already of the correct type.
			case llvm.BitCast:
				// Could happen when stored in an interface (which is of type
				// i8*).
				typecode = llvm.ConstIntToPtr(typecode, use.Type())
			default:
				panic("unexpected constant expression")
			}
			use.ReplaceAllUsesWith(typecode)
		}
	}

	// Only create this sidetable when it is necessary.
	if state.needsNamedNonBasicTypesSidetable {
		global := replaceGlobalIntWithArray(mod, "reflect.namedNonBasicTypesSidetable", state.namedNonBasicTypesSidetable)
		global.SetLinkage(llvm.InternalLinkage)
		global.SetUnnamedAddr(true)
		global.SetGlobalConstant(true)
	}
	if state.needsArrayTypesSidetable {
		global := replaceGlobalIntWithArray(mod, "reflect.arrayTypesSidetable", state.arrayTypesSidetable)
		global.SetLinkage(llvm.InternalLinkage)
		global.SetUnnamedAddr(true)
		global.SetGlobalConstant(true)
	}
	if state.needsStructTypesSidetable {
		global := replaceGlobalIntWithArray(mod, "reflect.structTypesSidetable", state.structTypesSidetable)
		global.SetLinkage(llvm.InternalLinkage)
		global.SetUnnamedAddr(true)
		global.SetGlobalConstant(true)
	}
	if state.needsStructNamesSidetable {
		global := replaceGlobalIntWithArray(mod, "reflect.structNamesSidetable", state.structNamesSidetable)
		global.SetLinkage(llvm.InternalLinkage)
		global.SetUnnamedAddr(true)
		global.SetGlobalConstant(true)
	}

	// Remove most objects created for interface and reflect lowering.
	// They would normally be removed anyway in later passes, but not always.
	// It also cleans up the IR for testing.
	for _, typ := range types {
		initializer := typ.typecode.Initializer()
		references := llvm.ConstExtractValue(initializer, []uint32{0})
		typ.typecode.SetInitializer(llvm.ConstNull(initializer.Type()))
		if strings.HasPrefix(typ.name, "reflect/types.type:struct:") {
			// Structs have a 'references' field that is not a typecode but
			// a pointer to a runtime.structField array and therefore a
			// bitcast. This global should be erased separately, otherwise
			// typecode objects cannot be erased.
			structFields := references.Operand(0)
			structFields.EraseFromParentAsGlobal()
		}
	}
}

// getTypeCodeNum returns the typecode for a given type as expected by the
// reflect package. Also see getTypeCodeName, which serializes types to a string
// based on a types.Type value for this function.
func (state *typeCodeAssignmentState) getTypeCodeNum(typecode llvm.Value) *big.Int {
	// Note: see src/reflect/type.go for bit allocations.
	class, value := getClassAndValueFromTypeCode(typecode)
	name := ""
	if class == "named" {
		name = value
		typecode = llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
		class, value = getClassAndValueFromTypeCode(typecode)
	}
	if class == "basic" {
		// Basic types follow the following bit pattern:
		//    ...xxxxx0
		// where xxxxx is allocated for the 18 possible basic types and all the
		// upper bits are used to indicate the named type.
		num, ok := basicTypes[value]
		if !ok {
			panic("invalid basic type: " + value)
		}
		if name != "" {
			// This type is named, set the upper bits to the name ID.
			num |= int64(state.getBasicNamedTypeNum(name)) << 5
		}
		return big.NewInt(num << 1)
	} else {
		// Non-baisc types use the following bit pattern:
		//    ...nxxx1
		// where xxx indicates the non-basic type. The upper bits contain
		// whatever the type contains. Types that wrap a single other type
		// (channel, interface, pointer, slice) just contain the bits of the
		// wrapped type. Other types (like struct) need more fields and thus
		// cannot be encoded as a simple prefix.
		var classNumber int64
		if n, ok := nonBasicTypes[class]; ok {
			classNumber = n
		} else {
			panic("unknown type kind: " + class)
		}
		var num *big.Int
		lowBits := (classNumber << 1) + 1 // the 5 low bits of the typecode
		if name == "" {
			num = state.getNonBasicTypeCode(class, typecode)
		} else {
			// We must return a named type here. But first check whether it
			// has already been defined.
			if index, ok := state.namedNonBasicTypes[name]; ok {
				num := big.NewInt(int64(index))
				num.Lsh(num, 5).Or(num, big.NewInt((classNumber<<1)+1+(1<<4)))
				return num
			}
			lowBits |= 1 << 4 // set the 'n' bit (see above)
			if !state.needsNamedNonBasicTypesSidetable {
				// Use simple small integers in this case, to make these numbers
				// smaller.
				index := len(state.namedNonBasicTypes) + 1
				state.namedNonBasicTypes[name] = index
				num = big.NewInt(int64(index))
			} else {
				// We need to store full type information.
				// First allocate a number in the named non-basic type
				// sidetable.
				index := len(state.namedNonBasicTypesSidetable)
				state.namedNonBasicTypesSidetable = append(state.namedNonBasicTypesSidetable, 0)
				state.namedNonBasicTypes[name] = index
				// Get the typecode of the underlying type (which could be the
				// element type in the case of pointers, for example).
				num = state.getNonBasicTypeCode(class, typecode)
				if num.BitLen() > state.uintptrLen || !num.IsUint64() {
					panic("cannot store value in sidetable")
				}
				// Now update the side table with the number we just
				// determined. We need this multi-step approach to avoid stack
				// overflow due to adding types recursively in the case of
				// linked lists (a pointer which points to a struct that
				// contains that same pointer).
				state.namedNonBasicTypesSidetable[index] = num.Uint64()
				num = big.NewInt(int64(index))
			}
		}
		// Concatenate the 'num' and 'lowBits' bitstrings.
		num.Lsh(num, 5).Or(num, big.NewInt(lowBits))
		return num
	}
}

// getNonBasicTypeCode is used by getTypeCodeNum. It returns the upper bits of
// the type code used there in the type code.
func (state *typeCodeAssignmentState) getNonBasicTypeCode(class string, typecode llvm.Value) *big.Int {
	switch class {
	case "chan", "pointer", "slice":
		// Prefix-style type kinds. The upper bits contain the element type.
		sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
		return state.getTypeCodeNum(sub)
	case "array":
		// An array is basically a pair of (typecode, length) stored in a
		// sidetable.
		return big.NewInt(int64(state.getArrayTypeNum(typecode)))
	case "struct":
		// More complicated type kind. The upper bits contain the index to the
		// struct type in the struct types sidetable.
		return big.NewInt(int64(state.getStructTypeNum(typecode)))
	default:
		// Type has not yet been implemented, so fall back by using a unique
		// number.
		num := big.NewInt(int64(state.fallbackIndex))
		state.fallbackIndex++
		return num
	}
}

// getClassAndValueFromTypeCode takes a typecode (a llvm.Value of type
// runtime.typecodeID), looks at the name, and extracts the typecode class and
// value from it. For example, for a typecode with the following name:
//     reflect/types.type:pointer:named:reflect.ValueError
// It extracts:
//     class = "pointer"
//     value = "named:reflect.ValueError"
func getClassAndValueFromTypeCode(typecode llvm.Value) (class, value string) {
	typecodeName := typecode.Name()
	const prefix = "reflect/types.type:"
	if !strings.HasPrefix(typecodeName, prefix) {
		panic("unexpected typecode name: " + typecodeName)
	}
	id := typecodeName[len(prefix):]
	class = id[:strings.IndexByte(id, ':')]
	value = id[len(class)+1:]
	return
}

// getBasicNamedTypeNum returns an appropriate (unique) number for the given
// named type. If the name already has a number that number is returned, else a
// new number is returned. The number is always non-zero.
func (state *typeCodeAssignmentState) getBasicNamedTypeNum(name string) int {
	if num, ok := state.namedBasicTypes[name]; ok {
		return num
	}
	num := len(state.namedBasicTypes) + 1
	state.namedBasicTypes[name] = num
	return num
}

// getArrayTypeNum returns the array type number, which is an index into the
// reflect.arrayTypesSidetable or a unique number for this type if this table is
// not used.
func (state *typeCodeAssignmentState) getArrayTypeNum(typecode llvm.Value) int {
	name := typecode.Name()
	if num, ok := state.arrayTypes[name]; ok {
		// This array type already has an entry in the sidetable. Don't store
		// it twice.
		return num
	}

	if !state.needsArrayTypesSidetable {
		// We don't need array sidetables, so we can just assign monotonically
		// increasing numbers to each array type.
		num := len(state.arrayTypes)
		state.arrayTypes[name] = num
		return num
	}

	elemTypeCode := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
	elemTypeNum := state.getTypeCodeNum(elemTypeCode)
	if elemTypeNum.BitLen() > state.uintptrLen || !elemTypeNum.IsUint64() {
		// TODO: make this a regular error
		panic("array element type has a type code that is too big")
	}

	// The array side table is a sequence of {element type, array length}.
	arrayLength := llvm.ConstExtractValue(typecode.Initializer(), []uint32{1}).ZExtValue()
	buf := makeVarint(elemTypeNum.Uint64())
	buf = append(buf, makeVarint(arrayLength)...)

	index := len(state.arrayTypesSidetable)
	state.arrayTypes[name] = index
	state.arrayTypesSidetable = append(state.arrayTypesSidetable, buf...)
	return index
}

// getStructTypeNum returns the struct type number, which is an index into
// reflect.structTypesSidetable or an unique number for every struct if this
// sidetable is not needed in the to-be-compiled program.
func (state *typeCodeAssignmentState) getStructTypeNum(typecode llvm.Value) int {
	name := typecode.Name()
	if num, ok := state.structTypes[name]; ok {
		// This struct already has an assigned type code.
		return num
	}

	if !state.needsStructTypesSidetable {
		// We don't need struct sidetables, so we can just assign monotonically
		// increasing numbers to each struct type.
		num := len(state.structTypes)
		state.structTypes[name] = num
		return num
	}

	// Get the fields this struct type contains.
	// The struct number will be the start index of
	structTypeGlobal := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}).Operand(0).Initializer()
	numFields := structTypeGlobal.Type().ArrayLength()

	// The first data that is stored in the struct sidetable is the number of
	// fields this struct contains. This is usually just a single byte because
	// most structs don't contain that many fields, but make it a varint just
	// to be sure.
	buf := makeVarint(uint64(numFields))

	// Iterate over every field in the struct.
	// Every field is stored sequentially in the struct sidetable. Fields can
	// be retrieved from this list of fields at runtime by iterating over all
	// of them until the right field has been found.
	// Perhaps adding some index would speed things up, but it would also make
	// the sidetable bigger.
	for i := 0; i < numFields; i++ {
		// Collect some information about this field.
		field := llvm.ConstExtractValue(structTypeGlobal, []uint32{uint32(i)})

		nameGlobal := llvm.ConstExtractValue(field, []uint32{1})
		if nameGlobal == llvm.ConstPointerNull(nameGlobal.Type()) {
			panic("compiler: no name for this struct field")
		}
		fieldNameBytes := getGlobalBytes(nameGlobal.Operand(0))
		fieldNameNumber := state.getStructNameNumber(fieldNameBytes)

		// See whether this struct field has an associated tag, and if so,
		// store that tag in the tags sidetable.
		tagGlobal := llvm.ConstExtractValue(field, []uint32{2})
		hasTag := false
		tagNumber := 0
		if tagGlobal != llvm.ConstPointerNull(tagGlobal.Type()) {
			hasTag = true
			tagBytes := getGlobalBytes(tagGlobal.Operand(0))
			tagNumber = state.getStructNameNumber(tagBytes)
		}

		// The 'embedded' or 'anonymous' flag for this field.
		embedded := llvm.ConstExtractValue(field, []uint32{3}).ZExtValue() != 0

		// The first byte in the struct types sidetable is a flags byte with
		// two bits in it.
		flagsByte := byte(0)
		if embedded {
			flagsByte |= 1
		}
		if hasTag {
			flagsByte |= 2
		}
		if ast.IsExported(string(fieldNameBytes)) {
			flagsByte |= 4
		}
		buf = append(buf, flagsByte)

		// Get the type number and add it to the buffer.
		// All fields have a type, so include it directly here.
		typeNum := state.getTypeCodeNum(llvm.ConstExtractValue(field, []uint32{0}))
		if typeNum.BitLen() > state.uintptrLen || !typeNum.IsUint64() {
			// TODO: make this a regular error
			panic("struct field has a type code that is too big")
		}
		buf = append(buf, makeVarint(typeNum.Uint64())...)

		// Add the name.
		buf = append(buf, makeVarint(uint64(fieldNameNumber))...)

		// Add the tag, if there is one.
		if hasTag {
			buf = append(buf, makeVarint(uint64(tagNumber))...)
		}
	}

	num := len(state.structTypesSidetable)
	state.structTypes[name] = num
	state.structTypesSidetable = append(state.structTypesSidetable, buf...)
	return num
}

// getStructNameNumber stores this string (name or tag) onto the struct names
// sidetable. The format is a varint of the length of the struct, followed by
// the raw bytes of the name. Multiple identical strings are stored under the
// same name for space efficiency.
func (state *typeCodeAssignmentState) getStructNameNumber(nameBytes []byte) int {
	name := string(nameBytes)
	if n, ok := state.structNames[name]; ok {
		// This name was used before, re-use it now (for space efficiency).
		return n
	}
	// This name is not yet in the names sidetable. Add it now.
	n := len(state.structNamesSidetable)
	state.structNames[name] = n
	state.structNamesSidetable = append(state.structNamesSidetable, makeVarint(uint64(len(nameBytes)))...)
	state.structNamesSidetable = append(state.structNamesSidetable, nameBytes...)
	return n
}

// makeVarint is a small helper function that returns the bytes of the number in
// varint encoding.
func makeVarint(n uint64) []byte {
	buf := make([]byte, binary.MaxVarintLen64)
	return buf[:binary.PutUvarint(buf, n)]
}