interp: use object layout information for LLVM types

This commit will use the memory layout information for heap allocations
added in the previous commit to determine LLVM types, instead of
guessing their types based on the content. This fixes a bug in which
recursive data structures (such as doubly linked lists) would result in
a compiler stack overflow due to infinite recursion.

Not all heap allocations have a memory layout yet, but this can be
incrementally fixed in the future. So far, this commit should fix
(almost?) all cases of this stack overflow issue.
Этот коммит содержится в:
Ayke van Laethem 2021-07-15 18:23:35 +02:00 коммит произвёл Ron Evans
родитель 54dd75f7b3
коммит 1869efe954
6 изменённых файлов: 270 добавлений и 12 удалений

Просмотреть файл

@ -17,6 +17,7 @@ func TestInterp(t *testing.T) {
"consteval", "consteval",
"interface", "interface",
"revert", "revert",
"alloc",
} { } {
name := name // make tc local to this closure name := name // make tc local to this closure
t.Run(name, func(t *testing.T) { t.Run(name, func(t *testing.T) {

Просмотреть файл

@ -234,11 +234,15 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
// Get the requested memory size to be allocated. // Get the requested memory size to be allocated.
size := operands[1].Uint() size := operands[1].Uint()
// Get the object layout, if it is available.
llvmLayoutType := r.getLLVMTypeFromLayout(operands[2])
// Create the object. // Create the object.
alloc := object{ alloc := object{
globalName: r.pkgName + "$alloc", globalName: r.pkgName + "$alloc",
buffer: newRawValue(uint32(size)), llvmLayoutType: llvmLayoutType,
size: uint32(size), buffer: newRawValue(uint32(size)),
size: uint32(size),
} }
index := len(r.objects) index := len(r.objects)
r.objects = append(r.objects, alloc) r.objects = append(r.objects, alloc)

Просмотреть файл

@ -18,6 +18,7 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"math" "math"
"math/big"
"strconv" "strconv"
"strings" "strings"
@ -27,17 +28,20 @@ import (
// An object is a memory buffer that may be an already existing global or a // An object is a memory buffer that may be an already existing global or a
// global created with runtime.alloc or the alloca instruction. If llvmGlobal is // global created with runtime.alloc or the alloca instruction. If llvmGlobal is
// set, that's the global for this object, otherwise it needs to be created (if // set, that's the global for this object, otherwise it needs to be created (if
// it is still reachable when the package initializer returns). // it is still reachable when the package initializer returns). The
// llvmLayoutType is not necessarily a complete type: it may need to be
// repeated (for example, for a slice value).
// //
// Objects are copied in a memory view when they are stored to, to provide the // Objects are copied in a memory view when they are stored to, to provide the
// ability to roll back interpreting a function. // ability to roll back interpreting a function.
type object struct { type object struct {
llvmGlobal llvm.Value llvmGlobal llvm.Value
llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set
globalName string // name, if not yet created (not guaranteed to be the final name) llvmLayoutType llvm.Type // LLVM type based on runtime.alloc layout parameter, if available
buffer value // buffer with value as given by interp, nil if external globalName string // name, if not yet created (not guaranteed to be the final name)
size uint32 // must match buffer.len(), if available buffer value // buffer with value as given by interp, nil if external
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write size uint32 // must match buffer.len(), if available
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write
} }
// clone() returns a cloned version of this object, for when an object needs to // clone() returns a cloned version of this object, for when an object needs to
@ -541,7 +545,7 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
// runtime.alloc. // runtime.alloc.
// First allocate a new global for this object. // First allocate a new global for this object.
obj := mem.get(v.index()) obj := mem.get(v.index())
if obj.llvmType.IsNil() { if obj.llvmType.IsNil() && obj.llvmLayoutType.IsNil() {
// Create an initializer without knowing the global type. // Create an initializer without knowing the global type.
// This is probably the result of a runtime.alloc call. // This is probably the result of a runtime.alloc call.
initializer, err := obj.buffer.asRawValue(mem.r).rawLLVMValue(mem) initializer, err := obj.buffer.asRawValue(mem.r).rawLLVMValue(mem)
@ -555,7 +559,23 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
obj.llvmGlobal = llvmValue obj.llvmGlobal = llvmValue
mem.put(v.index(), obj) mem.put(v.index(), obj)
} else { } else {
globalType := obj.llvmType.ElementType() // The global type is known, or at least its structure.
var globalType llvm.Type
if !obj.llvmType.IsNil() {
// The exact type is known.
globalType = obj.llvmType.ElementType()
} else { // !obj.llvmLayoutType.IsNil()
// The exact type isn't known, but the object layout is known.
globalType = obj.llvmLayoutType
// The layout may not span the full size of the global because
// of repetition. One example would be make([]string, 5) which
// would be 10 words in size but the layout would only be two
// words (for the string type).
typeSize := mem.r.targetData.TypeAllocSize(globalType)
if typeSize != uint64(obj.size) {
globalType = llvm.ArrayType(globalType, int(uint64(obj.size)/typeSize))
}
}
if checks && mem.r.targetData.TypeAllocSize(globalType) != uint64(obj.size) { if checks && mem.r.targetData.TypeAllocSize(globalType) != uint64(obj.size) {
panic("size of the globalType isn't the same as the object size") panic("size of the globalType isn't the same as the object size")
} }
@ -574,6 +594,11 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
return llvm.Value{}, errors.New("interp: allocated value does not match allocated type") return llvm.Value{}, errors.New("interp: allocated value does not match allocated type")
} }
llvmValue.SetInitializer(initializer) llvmValue.SetInitializer(initializer)
if obj.llvmType.IsNil() {
// The exact type isn't known (only the layout), so use the
// alignment that would normally be expected from runtime.alloc.
llvmValue.SetAlignment(mem.r.maxAlign)
}
} }
// It should be included in r.globals because otherwise markExternal // It should be included in r.globals because otherwise markExternal
@ -1155,3 +1180,108 @@ func (r *runner) getValue(llvmValue llvm.Value) value {
panic("unknown value") panic("unknown value")
} }
} }
// readObjectLayout reads the object layout as it is stored by the compiler. It
// returns the size in the number of words and the bitmap.
func (r *runner) readObjectLayout(layoutValue value) (uint64, *big.Int) {
pointerSize := layoutValue.len(r)
if checks && uint64(pointerSize) != r.targetData.TypeAllocSize(r.i8ptrType) {
panic("inconsistent pointer size")
}
// The object layout can be stored in a global variable, directly as an
// integer value, or can be nil.
ptr, err := layoutValue.asPointer(r)
if err == errIntegerAsPointer {
// It's an integer, which means it's a small object or unknown.
layout := layoutValue.Uint()
if layout == 0 {
// Nil pointer, which means the layout is unknown.
return 0, nil
}
if layout%2 != 1 {
// Sanity check: the least significant bit must be set. This is how
// the runtime can separate pointers from integers.
panic("unexpected layout")
}
// Determine format of bitfields in the integer.
pointerBits := uint64(pointerSize * 8)
var sizeFieldBits uint64
switch pointerBits {
case 16:
sizeFieldBits = 4
case 32:
sizeFieldBits = 5
case 64:
sizeFieldBits = 6
default:
panic("unknown pointer size")
}
// Extract fields.
objectSizeWords := (layout >> 1) & (1<<sizeFieldBits - 1)
bitmap := new(big.Int).SetUint64(layout >> (1 + sizeFieldBits))
return objectSizeWords, bitmap
}
// Read the object size in words and the bitmap from the global.
buf := r.objects[ptr.index()].buffer.(rawValue)
objectSizeWords := rawValue{buf: buf.buf[:r.pointerSize]}.Uint()
rawByteValues := buf.buf[r.pointerSize:]
rawBytes := make([]byte, len(rawByteValues))
for i, v := range rawByteValues {
if uint64(byte(v)) != v {
panic("found pointer in data array?") // sanity check
}
rawBytes[i] = byte(v)
}
bitmap := new(big.Int).SetBytes(rawBytes)
return objectSizeWords, bitmap
}
// getLLVMTypeFromLayout returns the 'layout type', which is an approximation of
// the real type. Pointers are in the correct location but the actual object may
// have some additional repetition, for example in the buffer of a slice.
func (r *runner) getLLVMTypeFromLayout(layoutValue value) llvm.Type {
objectSizeWords, bitmap := r.readObjectLayout(layoutValue)
if bitmap == nil {
// No information available.
return llvm.Type{}
}
if bitmap.BitLen() == 0 {
// There are no pointers in this object, so treat this as a raw byte
// buffer. This is important because objects without pointers may have
// lower alignment.
return r.mod.Context().Int8Type()
}
// Create the LLVM type.
pointerSize := layoutValue.len(r)
pointerAlignment := r.targetData.PrefTypeAlignment(r.i8ptrType)
var fields []llvm.Type
for i := 0; i < int(objectSizeWords); {
if bitmap.Bit(i) != 0 {
// Pointer field.
fields = append(fields, r.i8ptrType)
i += int(pointerSize / uint32(pointerAlignment))
} else {
// Byte/word field.
fields = append(fields, r.mod.Context().IntType(pointerAlignment*8))
i += 1
}
}
var llvmLayoutType llvm.Type
if len(fields) == 1 {
llvmLayoutType = fields[0]
} else {
llvmLayoutType = r.mod.Context().StructType(fields, false)
}
objectSizeBytes := objectSizeWords * uint64(pointerAlignment)
if checks && r.targetData.TypeAllocSize(llvmLayoutType) != objectSizeBytes {
panic("unexpected size") // sanity check
}
return llvmLayoutType
}

53
interp/testdata/alloc.ll предоставленный Обычный файл
Просмотреть файл

@ -0,0 +1,53 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32--wasi"
@"runtime/gc.layout:62-2000000000000001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c" \00\00\00\00\00\00\01" }
@pointerFree12 = global i8* null
@pointerFree7 = global i8* null
@pointerFree3 = global i8* null
@pointerFree0 = global i8* null
@layout1 = global i8* null
@layout2 = global i8* null
@layout3 = global i8* null
@layout4 = global i8* null
@bigobj1 = global i8* null
declare i8* @runtime.alloc(i32, i8*) unnamed_addr
define void @runtime.initAll() unnamed_addr {
call void @main.init()
ret void
}
define internal void @main.init() unnamed_addr {
; Object that's word-aligned.
%pointerFree12 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree12, i8** @pointerFree12
; Object larger than a word but not word-aligned.
%pointerFree7 = call i8* @runtime.alloc(i32 7, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree7, i8** @pointerFree7
; Object smaller than a word (and of course not word-aligned).
%pointerFree3 = call i8* @runtime.alloc(i32 3, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree3, i8** @pointerFree3
; Zero-sized object.
%pointerFree0 = call i8* @runtime.alloc(i32 0, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree0, i8** @pointerFree0
; Object made out of 3 pointers.
%layout1 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 67 to i8*))
store i8* %layout1, i8** @layout1
; Array (or slice) of 5 slices.
%layout2 = call i8* @runtime.alloc(i32 60, i8* inttoptr (i32 71 to i8*))
store i8* %layout2, i8** @layout2
; Oddly shaped object, using all bits in the layout integer.
%layout3 = call i8* @runtime.alloc(i32 104, i8* inttoptr (i32 2467830261 to i8*))
store i8* %layout3, i8** @layout3
; ...repeated.
%layout4 = call i8* @runtime.alloc(i32 312, i8* inttoptr (i32 2467830261 to i8*))
store i8* %layout4, i8** @layout4
; Large object that needs to be stored in a separate global.
%bigobj1 = call i8* @runtime.alloc(i32 248, i8* bitcast ({ i32, [8 x i8] }* @"runtime/gc.layout:62-2000000000000001" to i8*))
store i8* %bigobj1, i8** @bigobj1
ret void
}

25
interp/testdata/alloc.out.ll предоставленный Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32--wasi"
@pointerFree12 = local_unnamed_addr global i8* getelementptr inbounds ([12 x i8], [12 x i8]* @"main$alloc", i32 0, i32 0)
@pointerFree7 = local_unnamed_addr global i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"main$alloc.1", i32 0, i32 0)
@pointerFree3 = local_unnamed_addr global i8* getelementptr inbounds ([3 x i8], [3 x i8]* @"main$alloc.2", i32 0, i32 0)
@pointerFree0 = local_unnamed_addr global i8* getelementptr inbounds ([0 x i8], [0 x i8]* @"main$alloc.3", i32 0, i32 0)
@layout1 = local_unnamed_addr global i8* bitcast ([3 x i8*]* @"main$alloc.4" to i8*)
@layout2 = local_unnamed_addr global i8* bitcast ([5 x { i8*, i32, i32 }]* @"main$alloc.5" to i8*)
@layout3 = local_unnamed_addr global i8* bitcast ({ i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }* @"main$alloc.6" to i8*)
@layout4 = local_unnamed_addr global i8* bitcast ([3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }]* @"main$alloc.7" to i8*)
@bigobj1 = local_unnamed_addr global i8* bitcast ({ i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }* @"main$alloc.8" to i8*)
@"main$alloc" = internal global [12 x i8] zeroinitializer, align 4
@"main$alloc.1" = internal global [7 x i8] zeroinitializer, align 4
@"main$alloc.2" = internal global [3 x i8] zeroinitializer, align 4
@"main$alloc.3" = internal global [0 x i8] zeroinitializer, align 4
@"main$alloc.4" = internal global [3 x i8*] zeroinitializer, align 4
@"main$alloc.5" = internal global [5 x { i8*, i32, i32 }] zeroinitializer, align 4
@"main$alloc.6" = internal global { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* } zeroinitializer, align 4
@"main$alloc.7" = internal global [3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }] zeroinitializer, align 4
@"main$alloc.8" = internal global { i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } zeroinitializer, align 4
define void @runtime.initAll() unnamed_addr {
ret void
}

45
testdata/init.go предоставленный
Просмотреть файл

@ -44,8 +44,53 @@ var (
uint8SliceDst []uint8 uint8SliceDst []uint8
intSliceSrc = []int16{5, 123, 1024} intSliceSrc = []int16{5, 123, 1024}
intSliceDst []int16 intSliceDst []int16
someList *linkedList
someBigList *bigLinkedList
) )
type linkedList struct {
prev *linkedList
next *linkedList
v int // arbitrary value (don't care)
}
func init() {
someList = &linkedList{
v: -1,
}
for i := 0; i < 3; i++ {
prev := someList
someList = &linkedList{
v: i,
prev: prev,
}
prev.next = someList
}
}
type bigLinkedList struct {
prev *bigLinkedList
next *bigLinkedList
v int
buf [100]*int
}
func init() {
// Create a circular reference.
someBigList = &bigLinkedList{
v: -1,
}
for i := 0; i < 3; i++ {
prev := someBigList
someBigList = &bigLinkedList{
v: i,
prev: prev,
}
prev.next = someBigList
}
}
func init() { func init() {
uint8SliceDst = make([]uint8, len(uint8SliceSrc)) uint8SliceDst = make([]uint8, len(uint8SliceSrc))
copy(uint8SliceDst, uint8SliceSrc) copy(uint8SliceDst, uint8SliceSrc)