wasm: fix GC scanning of allocas
Scanning of allocas was entirely broken on WebAssembly. The code intended to do this was never run. There were also no tests. Looking into this further, I found that it is actually not really necessary to do that: the C stack can be scanned conservatively and in fact this was already done for goroutine stacks (because they live on the heap and are always referenced). It wasn't done for the system stack however. With these fixes, I believe code should be both faster *and* more correct. I found this in my work to get opaque pointers supported in LLVM 15, because the code that was never reached now finally got run and was actually quite buggy.
Этот коммит содержится в:
родитель
6b46ae261a
коммит
65d65c1313
7 изменённых файлов: 57 добавлений и 54 удалений
|
@ -58,6 +58,8 @@ var (
|
|||
|
||||
globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol))
|
||||
globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol))
|
||||
|
||||
stackTop = uintptr(unsafe.Pointer(&globalsStartSymbol))
|
||||
)
|
||||
|
||||
func align(ptr uintptr) uintptr {
|
||||
|
@ -67,6 +69,7 @@ func align(ptr uintptr) uintptr {
|
|||
return (ptr + heapAlign - 1) &^ (heapAlign - 1)
|
||||
}
|
||||
|
||||
//export tinygo_getCurrentStackPointer
|
||||
func getCurrentStackPointer() uintptr
|
||||
|
||||
// growHeap tries to grow the heap size. It returns true if it succeeds, false
|
||||
|
|
10
src/runtime/asm_tinygowasm.S
Обычный файл
10
src/runtime/asm_tinygowasm.S
Обычный файл
|
@ -0,0 +1,10 @@
|
|||
.globaltype __stack_pointer, i32
|
||||
|
||||
.global tinygo_getCurrentStackPointer
|
||||
.hidden tinygo_getCurrentStackPointer
|
||||
.type tinygo_getCurrentStackPointer,@function
|
||||
tinygo_getCurrentStackPointer: // func getCurrentStackPointer() uintptr
|
||||
.functype tinygo_getCurrentStackPointer() -> (i32)
|
||||
global.get __stack_pointer
|
||||
return
|
||||
end_function
|
|
@ -4,6 +4,7 @@
|
|||
package runtime
|
||||
|
||||
import (
|
||||
"internal/task"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
|
@ -17,19 +18,28 @@ type stackChainObject struct {
|
|||
|
||||
// markStack marks all root pointers found on the stack.
|
||||
//
|
||||
// This implementation is conservative and relies on the compiler inserting code
|
||||
// to manually push/pop stack objects that are stored in a linked list starting
|
||||
// with stackChainStart. Manually keeping track of stack values is _much_ more
|
||||
// expensive than letting the compiler do it and it inhibits a few important
|
||||
// optimizations, but it has the big advantage of being portable to basically
|
||||
// any ISA, including WebAssembly.
|
||||
// - Goroutine stacks are heap allocated and always reachable in some way
|
||||
// (for example through internal/task.currentTask) so they will always be
|
||||
// scanned.
|
||||
// - The system stack (aka startup stack) is not heap allocated, so even
|
||||
// though it may be referenced it will not be scanned by default.
|
||||
//
|
||||
// Therefore, we only need to scan the system stack.
|
||||
// It is relatively easy to scan the system stack while we're on it: we can
|
||||
// simply read __stack_pointer and __global_base and scan the area inbetween.
|
||||
// Unfortunately, it's hard to get the system stack pointer while we're on a
|
||||
// goroutine stack. But when we're on a goroutine stack, the system stack is in
|
||||
// the scheduler which means there shouldn't be anything on the system stack
|
||||
// anyway.
|
||||
// ...I hope this assumption holds, otherwise we will need to store the system
|
||||
// stack in a global or something.
|
||||
//
|
||||
// The compiler also inserts code to store all globals in a chain via
|
||||
// stackChainStart. Luckily we don't need to scan these, as these globals are
|
||||
// stored on the goroutine stack and are therefore already getting scanned.
|
||||
func markStack() {
|
||||
stackObject := stackChainStart
|
||||
for stackObject != nil {
|
||||
start := uintptr(unsafe.Pointer(stackObject)) + unsafe.Sizeof(uintptr(0))*2
|
||||
end := start + stackObject.numSlots*unsafe.Alignof(uintptr(0))
|
||||
markRoots(start, end)
|
||||
stackObject = stackObject.parent
|
||||
if task.OnSystemStack() {
|
||||
markRoots(getCurrentStackPointer(), stackTop)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
"--stack-first",
|
||||
"--no-demangle"
|
||||
],
|
||||
"extra-files": [
|
||||
"src/runtime/asm_tinygowasm.S"
|
||||
],
|
||||
"emulator": "wasmtime {}",
|
||||
"wasm-abi": "generic"
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
"--stack-first",
|
||||
"--no-demangle"
|
||||
],
|
||||
"extra-files": [
|
||||
"src/runtime/asm_tinygowasm.S"
|
||||
],
|
||||
"emulator": "node {root}/targets/wasm_exec.js {}",
|
||||
"wasm-abi": "js"
|
||||
}
|
||||
|
|
|
@ -139,7 +139,7 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
|||
}
|
||||
|
||||
// Determine what to do with each call.
|
||||
var allocas, pointers []llvm.Value
|
||||
var pointers []llvm.Value
|
||||
for _, call := range calls {
|
||||
ptr := call.Operand(0)
|
||||
call.EraseFromParentAsInstruction()
|
||||
|
@ -189,16 +189,15 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
|||
// be optimized if needed.
|
||||
}
|
||||
|
||||
if !ptr.IsAAllocaInst().IsNil() {
|
||||
if typeHasPointers(ptr.Type().ElementType()) {
|
||||
allocas = append(allocas, ptr)
|
||||
}
|
||||
} else {
|
||||
pointers = append(pointers, ptr)
|
||||
if ptr := stripPointerCasts(ptr); !ptr.IsAAllocaInst().IsNil() {
|
||||
// Allocas don't need to be tracked because they are allocated
|
||||
// on the C stack which is scanned separately.
|
||||
continue
|
||||
}
|
||||
pointers = append(pointers, ptr)
|
||||
}
|
||||
|
||||
if len(allocas) == 0 && len(pointers) == 0 {
|
||||
if len(pointers) == 0 {
|
||||
// This function does not need to keep track of stack pointers.
|
||||
continue
|
||||
}
|
||||
|
@ -208,9 +207,6 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
|||
stackChainStartType, // Pointer to parent frame.
|
||||
uintptrType, // Number of elements in this frame.
|
||||
}
|
||||
for _, alloca := range allocas {
|
||||
fields = append(fields, alloca.Type().ElementType())
|
||||
}
|
||||
for _, ptr := range pointers {
|
||||
fields = append(fields, ptr.Type())
|
||||
}
|
||||
|
@ -235,16 +231,6 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
|||
stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "")
|
||||
builder.CreateStore(stackObjectCast, stackChainStart)
|
||||
|
||||
// Replace all independent allocas with GEPs in the stack object.
|
||||
for i, alloca := range allocas {
|
||||
gep := builder.CreateGEP(stackObject, []llvm.Value{
|
||||
llvm.ConstInt(ctx.Int32Type(), 0, false),
|
||||
llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false),
|
||||
}, "")
|
||||
alloca.ReplaceAllUsesWith(gep)
|
||||
alloca.EraseFromParentAsInstruction()
|
||||
}
|
||||
|
||||
// Do a store to the stack object after each new pointer that is created.
|
||||
pointerStores := make(map[llvm.Value]struct{})
|
||||
for i, ptr := range pointers {
|
||||
|
@ -260,7 +246,7 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
|||
// Extract a pointer to the appropriate section of the stack object.
|
||||
gep := builder.CreateGEP(stackObject, []llvm.Value{
|
||||
llvm.ConstInt(ctx.Int32Type(), 0, false),
|
||||
llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false),
|
||||
llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false),
|
||||
}, "")
|
||||
|
||||
// Store the pointer into the stack slot.
|
||||
|
|
|
@ -75,26 +75,14 @@ func replaceGlobalIntWithArray(mod llvm.Module, name string, buf interface{}) ll
|
|||
return global
|
||||
}
|
||||
|
||||
// typeHasPointers returns whether this type is a pointer or contains pointers.
|
||||
// If the type is an aggregate type, it will check whether there is a pointer
|
||||
// inside.
|
||||
func typeHasPointers(t llvm.Type) bool {
|
||||
switch t.TypeKind() {
|
||||
case llvm.PointerTypeKind:
|
||||
return true
|
||||
case llvm.StructTypeKind:
|
||||
for _, subType := range t.StructElementTypes() {
|
||||
if typeHasPointers(subType) {
|
||||
return true
|
||||
}
|
||||
// stripPointerCasts strips instruction pointer casts (getelementptr and
|
||||
// bitcast) and returns the original value without the casts.
|
||||
func stripPointerCasts(value llvm.Value) llvm.Value {
|
||||
if !value.IsAInstruction().IsNil() {
|
||||
switch value.InstructionOpcode() {
|
||||
case llvm.GetElementPtr, llvm.BitCast:
|
||||
return stripPointerCasts(value.Operand(0))
|
||||
}
|
||||
return false
|
||||
case llvm.ArrayTypeKind:
|
||||
if typeHasPointers(t.ElementType()) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче