tinygo/interp/compiler.go

package interp

// This file compiles the LLVM IR to a form that's easy to efficiently
// interpret.

import (
	"strings"

	"tinygo.org/x/go-llvm"
)

// A function is a compiled LLVM function, which means that interpreting it
// avoids most CGo calls necessary. This is done in a separate step so the
// result can be cached.
// Functions are in SSA form, just like the LLVM version if it. The first block
// (blocks[0]) is the entry block.
type function struct {
	llvmFn llvm.Value
	name   string       // precalculated llvmFn.Name()
	params []llvm.Value // precalculated llvmFn.Params()
	blocks []*basicBlock
	locals map[llvm.Value]int
}

// basicBlock represents a LLVM basic block and contains a slice of
// instructions. The last instruction must be a terminator instruction.
type basicBlock struct {
	instructions []instruction
}

// instruction is a precompiled LLVM IR instruction. The operands can be either
// an already known value (such as literalValue or pointerValue) but can also be
// the special localValue, which means that the value is a function parameter or
// is produced by another instruction in the function. In that case, the
// interpreter will replace the operand with that local value.
type instruction struct {
	opcode     llvm.Opcode
	localIndex int
	operands   []value
	llvmInst   llvm.Value
	name       string
}

// String returns a nice human-readable version of this instruction.
func (inst *instruction) String() string {
	operands := make([]string, len(inst.operands))
	for i, op := range inst.operands {
		operands[i] = op.String()
	}

	name := instructionNameMap[inst.opcode]
	if name == "" {
		name = "<unknown op>"
	}
	return name + " " + strings.Join(operands, " ")
}

// compileFunction compiles a given LLVM function to an easier to interpret
// version of the function. As far as possible, all operands are preprocessed so
// that the interpreter doesn't have to call into LLVM.
func (r *runner) compileFunction(llvmFn llvm.Value) *function {
	fn := &function{
		llvmFn: llvmFn,
		name:   llvmFn.Name(),
		params: llvmFn.Params(),
		locals: make(map[llvm.Value]int),
	}
	if llvmFn.IsDeclaration() {
		// Nothing to do.
		return fn
	}

	for i, param := range fn.params {
		fn.locals[param] = i
	}

	// Make a map of all the blocks, to quickly find the block number for a
	// given branch instruction.
	blockIndices := make(map[llvm.Value]int)
	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
		index := len(blockIndices)
		blockIndices[llvmBB.AsValue()] = index
	}

	// Compile every block.
	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
		bb := &basicBlock{}
		fn.blocks = append(fn.blocks, bb)

		// Compile every instruction in the block.
		for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) {
			// Create instruction skeleton.
			opcode := llvmInst.InstructionOpcode()
			inst := instruction{
				opcode:     opcode,
				localIndex: len(fn.locals),
				llvmInst:   llvmInst,
			}
			fn.locals[llvmInst] = len(fn.locals)

			// Add operands specific for this instruction.
			switch opcode {
			case llvm.Ret:
				// Return instruction, which can either be a `ret void` (no
				// return value) or return a value.
				numOperands := llvmInst.OperandsCount()
				if numOperands != 0 {
					inst.operands = []value{
						r.getValue(llvmInst.Operand(0)),
					}
				}
			case llvm.Br:
				// Branch instruction. Can be either a conditional branch (with
				// 3 operands) or unconditional branch (with just one basic
				// block operand).
				numOperands := llvmInst.OperandsCount()
				switch numOperands {
				case 3:
					// Conditional jump to one of two blocks. Comparable to an
					// if/else in procedural languages.
					thenBB := llvmInst.Operand(2)
					elseBB := llvmInst.Operand(1)
					inst.operands = []value{
						r.getValue(llvmInst.Operand(0)),
						literalValue{uint32(blockIndices[thenBB])},
						literalValue{uint32(blockIndices[elseBB])},
					}
				case 1:
					// Unconditional jump to a target basic block. Comparable to
					// a jump in C and Go.
					jumpBB := llvmInst.Operand(0)
					inst.operands = []value{
						literalValue{uint32(blockIndices[jumpBB])},
					}
				default:
					panic("unknown number of operands")
				}
			case llvm.Switch:
				// A switch is an array of (value, label) pairs, of which the
				// first one indicates the to-switch value and the default
				// label.
				numOperands := llvmInst.OperandsCount()
				for i := 0; i < numOperands; i += 2 {
					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
					inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])})
				}
			case llvm.PHI:
				inst.name = llvmInst.Name()
				incomingCount := inst.llvmInst.IncomingCount()
				for i := 0; i < incomingCount; i++ {
					incomingBB := inst.llvmInst.IncomingBlock(i)
					incomingValue := inst.llvmInst.IncomingValue(i)
					inst.operands = append(inst.operands,
						literalValue{uint32(blockIndices[incomingBB.AsValue()])},
						r.getValue(incomingValue),
					)
				}
			case llvm.Select:
				// Select is a special instruction that is much like a ternary
				// operator. It produces operand 1 or 2 based on the boolean
				// that is operand 0.
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					r.getValue(llvmInst.Operand(1)),
					r.getValue(llvmInst.Operand(2)),
				}
			case llvm.Call:
				// Call is a regular function call but could also be a runtime
				// intrinsic. Some runtime intrinsics are treated specially by
				// the interpreter, such as runtime.alloc. We don't
				// differentiate between them here because these calls may also
				// need to be run at runtime, in which case they should all be
				// created in the same way.
				llvmCalledValue := llvmInst.CalledValue()
				if !llvmCalledValue.IsAFunction().IsNil() {
					name := llvmCalledValue.Name()
					if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") {
						// These intrinsics should not be interpreted, they are not
						// relevant to the execution of this function.
						continue
					}
				}
				inst.name = llvmInst.Name()
				numOperands := llvmInst.OperandsCount()
				inst.operands = append(inst.operands, r.getValue(llvmCalledValue))
				for i := 0; i < numOperands-1; i++ {
					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
				}
			case llvm.Load:
				// Load instruction. The interpreter will load from the
				// appropriate memory view.
				// Also provide the memory size to be loaded, which is necessary
				// with a lack of type information.
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					literalValue{r.targetData.TypeAllocSize(llvmInst.Type())},
				}
			case llvm.Store:
				// Store instruction. The interpreter will create a new object
				// in the memory view of the function invocation and store to
				// that, to make it possible to roll back this store.
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					r.getValue(llvmInst.Operand(1)),
				}
			case llvm.Alloca:
				// Alloca allocates stack space for local variables.
				numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32)
				elementSize := r.targetData.TypeAllocSize(inst.llvmInst.Type().ElementType())
				inst.operands = []value{
					literalValue{elementSize * uint64(numElements)},
				}
			case llvm.GetElementPtr:
				// GetElementPtr does pointer arithmetic.
				inst.name = llvmInst.Name()
				ptr := llvmInst.Operand(0)
				n := llvmInst.OperandsCount()
				elementType := ptr.Type().ElementType()
				// gep: [source ptr, dest value size, pairs of indices...]
				inst.operands = []value{
					r.getValue(ptr),
					literalValue{r.targetData.TypeAllocSize(llvmInst.Type().ElementType())},
					r.getValue(llvmInst.Operand(1)),
					literalValue{r.targetData.TypeAllocSize(elementType)},
				}
				for i := 2; i < n; i++ {
					operand := r.getValue(llvmInst.Operand(i))
					if elementType.TypeKind() == llvm.StructTypeKind {
						index := operand.(literalValue).value.(uint32)
						elementOffset := r.targetData.ElementOffset(elementType, int(index))
						// Encode operands in a special way. The elementOffset
						// is just the offset in bytes. The elementSize is a
						// negative number (when cast to a int64) by flipping
						// all the bits. This allows the interpreter to detect
						// this is a struct field and that it should not
						// multiply it with the elementOffset to get the offset.
						// It is important for the interpreter to know the
						// struct field index for when the GEP must be done at
						// runtime.
						inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)})
						elementType = elementType.StructElementTypes()[index]
					} else {
						elementType = elementType.ElementType()
						elementSize := r.targetData.TypeAllocSize(elementType)
						elementSizeOperand := literalValue{elementSize}
						// Add operand * elementSizeOperand bytes to the pointer.
						inst.operands = append(inst.operands, operand, elementSizeOperand)
					}
				}
			case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt:
				// Bitcasts are ususally used to cast a pointer from one type to
				// another leaving the pointer itself intact.
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
				}
			case llvm.ExtractValue:
				inst.name = llvmInst.Name()
				agg := llvmInst.Operand(0)
				var offset uint64
				indexingType := agg.Type()
				for _, index := range inst.llvmInst.Indices() {
					switch indexingType.TypeKind() {
					case llvm.StructTypeKind:
						offset += r.targetData.ElementOffset(indexingType, int(index))
						indexingType = indexingType.StructElementTypes()[index]
					default: // ArrayTypeKind
						indexingType = indexingType.ElementType()
						elementSize := r.targetData.TypeAllocSize(indexingType)
						offset += elementSize * uint64(index)
					}
				}
				size := r.targetData.TypeAllocSize(inst.llvmInst.Type())
				// extractvalue [agg, byteOffset, byteSize]
				inst.operands = []value{
					r.getValue(agg),
					literalValue{offset},
					literalValue{size},
				}
			case llvm.InsertValue:
				inst.name = llvmInst.Name()
				agg := llvmInst.Operand(0)
				var offset uint64
				indexingType := agg.Type()
				for _, index := range inst.llvmInst.Indices() {
					switch indexingType.TypeKind() {
					case llvm.StructTypeKind:
						offset += r.targetData.ElementOffset(indexingType, int(index))
						indexingType = indexingType.StructElementTypes()[index]
					default: // ArrayTypeKind
						indexingType = indexingType.ElementType()
						elementSize := r.targetData.TypeAllocSize(indexingType)
						offset += elementSize * uint64(index)
					}
				}
				// insertvalue [agg, elt, byteOffset]
				inst.operands = []value{
					r.getValue(agg),
					r.getValue(llvmInst.Operand(1)),
					literalValue{offset},
				}
			case llvm.ICmp:
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					r.getValue(llvmInst.Operand(1)),
					literalValue{uint8(llvmInst.IntPredicate())},
				}
			case llvm.FCmp:
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					r.getValue(llvmInst.Operand(1)),
					literalValue{uint8(llvmInst.FloatPredicate())},
				}
			case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor:
				// Integer binary operations.
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					r.getValue(llvmInst.Operand(1)),
				}
			case llvm.SExt, llvm.ZExt, llvm.Trunc:
				// Extend or shrink an integer size.
				// No sign extension going on so easy to do.
				// zext: [value, bitwidth]
				// trunc: [value, bitwidth]
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					literalValue{uint64(llvmInst.Type().IntTypeWidth())},
				}
			case llvm.SIToFP, llvm.UIToFP:
				// Convert an integer to a floating point instruction.
				// opcode: [value, bitwidth]
				inst.name = llvmInst.Name()
				inst.operands = []value{
					r.getValue(llvmInst.Operand(0)),
					literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)},
				}
			default:
				// Unknown instruction, which is already set in inst.opcode so
				// is detectable.
				// This error is handled when actually trying to interpret this
				// instruction (to not trigger on code that won't be executed).
			}
			bb.instructions = append(bb.instructions, inst)
		}
	}
	return fn
}

// instructionNameMap maps from instruction opcodes to instruction names. This
// can be useful for debug logging.
var instructionNameMap = [...]string{
	llvm.Ret:         "ret",
	llvm.Br:          "br",
	llvm.Switch:      "switch",
	llvm.IndirectBr:  "indirectbr",
	llvm.Invoke:      "invoke",
	llvm.Unreachable: "unreachable",

	// Standard Binary Operators
	llvm.Add:  "add",
	llvm.FAdd: "fadd",
	llvm.Sub:  "sub",
	llvm.FSub: "fsub",
	llvm.Mul:  "mul",
	llvm.FMul: "fmul",
	llvm.UDiv: "udiv",
	llvm.SDiv: "sdiv",
	llvm.FDiv: "fdiv",
	llvm.URem: "urem",
	llvm.SRem: "srem",
	llvm.FRem: "frem",

	// Logical Operators
	llvm.Shl:  "shl",
	llvm.LShr: "lshr",
	llvm.AShr: "ashr",
	llvm.And:  "and",
	llvm.Or:   "or",
	llvm.Xor:  "xor",

	// Memory Operators
	llvm.Alloca:        "alloca",
	llvm.Load:          "load",
	llvm.Store:         "store",
	llvm.GetElementPtr: "getelementptr",

	// Cast Operators
	llvm.Trunc:    "trunc",
	llvm.ZExt:     "zext",
	llvm.SExt:     "sext",
	llvm.FPToUI:   "fptoui",
	llvm.FPToSI:   "fptosi",
	llvm.UIToFP:   "uitofp",
	llvm.SIToFP:   "sitofp",
	llvm.FPTrunc:  "fptrunc",
	llvm.FPExt:    "fpext",
	llvm.PtrToInt: "ptrtoint",
	llvm.IntToPtr: "inttoptr",
	llvm.BitCast:  "bitcast",

	// Other Operators
	llvm.ICmp:           "icmp",
	llvm.FCmp:           "fcmp",
	llvm.PHI:            "phi",
	llvm.Call:           "call",
	llvm.Select:         "select",
	llvm.VAArg:          "vaarg",
	llvm.ExtractElement: "extractelement",
	llvm.InsertElement:  "insertelement",
	llvm.ShuffleVector:  "shufflevector",
	llvm.ExtractValue:   "extractvalue",
	llvm.InsertValue:    "insertvalue",
}