From 65d65c131345e47581835e76df5fd22fced15158 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 16 Oct 2022 00:44:38 +0200 Subject: [PATCH] wasm: fix GC scanning of allocas Scanning of allocas was entirely broken on WebAssembly. The code intended to do this was never run. There were also no tests. Looking into this further, I found that it is actually not really necessary to do that: the C stack can be scanned conservatively and in fact this was already done for goroutine stacks (because they live on the heap and are always referenced). It wasn't done for the system stack however. With these fixes, I believe code should be both faster *and* more correct. I found this in my work to get opaque pointers supported in LLVM 15, because the code that was never reached now finally got run and was actually quite buggy. --- src/runtime/arch_tinygowasm.go | 3 +++ src/runtime/asm_tinygowasm.S | 10 ++++++++++ src/runtime/gc_stack_portable.go | 34 +++++++++++++++++++++----------- targets/wasi.json | 3 +++ targets/wasm.json | 3 +++ transform/gc.go | 30 ++++++++-------------------- transform/llvm.go | 28 ++++++++------------------ 7 files changed, 57 insertions(+), 54 deletions(-) create mode 100644 src/runtime/asm_tinygowasm.S diff --git a/src/runtime/arch_tinygowasm.go b/src/runtime/arch_tinygowasm.go index 597b2489..f1893d91 100644 --- a/src/runtime/arch_tinygowasm.go +++ b/src/runtime/arch_tinygowasm.go @@ -58,6 +58,8 @@ var ( globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol)) globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol)) + + stackTop = uintptr(unsafe.Pointer(&globalsStartSymbol)) ) func align(ptr uintptr) uintptr { @@ -67,6 +69,7 @@ func align(ptr uintptr) uintptr { return (ptr + heapAlign - 1) &^ (heapAlign - 1) } +//export tinygo_getCurrentStackPointer func getCurrentStackPointer() uintptr // growHeap tries to grow the heap size. It returns true if it succeeds, false diff --git a/src/runtime/asm_tinygowasm.S b/src/runtime/asm_tinygowasm.S new file mode 100644 index 00000000..3278a7f6 --- /dev/null +++ b/src/runtime/asm_tinygowasm.S @@ -0,0 +1,10 @@ +.globaltype __stack_pointer, i32 + +.global tinygo_getCurrentStackPointer +.hidden tinygo_getCurrentStackPointer +.type tinygo_getCurrentStackPointer,@function +tinygo_getCurrentStackPointer: // func getCurrentStackPointer() uintptr + .functype tinygo_getCurrentStackPointer() -> (i32) + global.get __stack_pointer + return + end_function diff --git a/src/runtime/gc_stack_portable.go b/src/runtime/gc_stack_portable.go index 6ffc35d4..6802de02 100644 --- a/src/runtime/gc_stack_portable.go +++ b/src/runtime/gc_stack_portable.go @@ -4,6 +4,7 @@ package runtime import ( + "internal/task" "unsafe" ) @@ -17,19 +18,28 @@ type stackChainObject struct { // markStack marks all root pointers found on the stack. // -// This implementation is conservative and relies on the compiler inserting code -// to manually push/pop stack objects that are stored in a linked list starting -// with stackChainStart. Manually keeping track of stack values is _much_ more -// expensive than letting the compiler do it and it inhibits a few important -// optimizations, but it has the big advantage of being portable to basically -// any ISA, including WebAssembly. +// - Goroutine stacks are heap allocated and always reachable in some way +// (for example through internal/task.currentTask) so they will always be +// scanned. +// - The system stack (aka startup stack) is not heap allocated, so even +// though it may be referenced it will not be scanned by default. +// +// Therefore, we only need to scan the system stack. +// It is relatively easy to scan the system stack while we're on it: we can +// simply read __stack_pointer and __global_base and scan the area inbetween. +// Unfortunately, it's hard to get the system stack pointer while we're on a +// goroutine stack. But when we're on a goroutine stack, the system stack is in +// the scheduler which means there shouldn't be anything on the system stack +// anyway. +// ...I hope this assumption holds, otherwise we will need to store the system +// stack in a global or something. +// +// The compiler also inserts code to store all globals in a chain via +// stackChainStart. Luckily we don't need to scan these, as these globals are +// stored on the goroutine stack and are therefore already getting scanned. func markStack() { - stackObject := stackChainStart - for stackObject != nil { - start := uintptr(unsafe.Pointer(stackObject)) + unsafe.Sizeof(uintptr(0))*2 - end := start + stackObject.numSlots*unsafe.Alignof(uintptr(0)) - markRoots(start, end) - stackObject = stackObject.parent + if task.OnSystemStack() { + markRoots(getCurrentStackPointer(), stackTop) } } diff --git a/targets/wasi.json b/targets/wasi.json index e710b4bb..6cec6be4 100644 --- a/targets/wasi.json +++ b/targets/wasi.json @@ -18,6 +18,9 @@ "--stack-first", "--no-demangle" ], + "extra-files": [ + "src/runtime/asm_tinygowasm.S" + ], "emulator": "wasmtime {}", "wasm-abi": "generic" } diff --git a/targets/wasm.json b/targets/wasm.json index 2bcada5f..26494cc4 100644 --- a/targets/wasm.json +++ b/targets/wasm.json @@ -19,6 +19,9 @@ "--stack-first", "--no-demangle" ], + "extra-files": [ + "src/runtime/asm_tinygowasm.S" + ], "emulator": "node {root}/targets/wasm_exec.js {}", "wasm-abi": "js" } diff --git a/transform/gc.go b/transform/gc.go index eb3520aa..87dc6e88 100644 --- a/transform/gc.go +++ b/transform/gc.go @@ -139,7 +139,7 @@ func MakeGCStackSlots(mod llvm.Module) bool { } // Determine what to do with each call. - var allocas, pointers []llvm.Value + var pointers []llvm.Value for _, call := range calls { ptr := call.Operand(0) call.EraseFromParentAsInstruction() @@ -189,16 +189,15 @@ func MakeGCStackSlots(mod llvm.Module) bool { // be optimized if needed. } - if !ptr.IsAAllocaInst().IsNil() { - if typeHasPointers(ptr.Type().ElementType()) { - allocas = append(allocas, ptr) - } - } else { - pointers = append(pointers, ptr) + if ptr := stripPointerCasts(ptr); !ptr.IsAAllocaInst().IsNil() { + // Allocas don't need to be tracked because they are allocated + // on the C stack which is scanned separately. + continue } + pointers = append(pointers, ptr) } - if len(allocas) == 0 && len(pointers) == 0 { + if len(pointers) == 0 { // This function does not need to keep track of stack pointers. continue } @@ -208,9 +207,6 @@ func MakeGCStackSlots(mod llvm.Module) bool { stackChainStartType, // Pointer to parent frame. uintptrType, // Number of elements in this frame. } - for _, alloca := range allocas { - fields = append(fields, alloca.Type().ElementType()) - } for _, ptr := range pointers { fields = append(fields, ptr.Type()) } @@ -235,16 +231,6 @@ func MakeGCStackSlots(mod llvm.Module) bool { stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "") builder.CreateStore(stackObjectCast, stackChainStart) - // Replace all independent allocas with GEPs in the stack object. - for i, alloca := range allocas { - gep := builder.CreateGEP(stackObject, []llvm.Value{ - llvm.ConstInt(ctx.Int32Type(), 0, false), - llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false), - }, "") - alloca.ReplaceAllUsesWith(gep) - alloca.EraseFromParentAsInstruction() - } - // Do a store to the stack object after each new pointer that is created. pointerStores := make(map[llvm.Value]struct{}) for i, ptr := range pointers { @@ -260,7 +246,7 @@ func MakeGCStackSlots(mod llvm.Module) bool { // Extract a pointer to the appropriate section of the stack object. gep := builder.CreateGEP(stackObject, []llvm.Value{ llvm.ConstInt(ctx.Int32Type(), 0, false), - llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false), + llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false), }, "") // Store the pointer into the stack slot. diff --git a/transform/llvm.go b/transform/llvm.go index 90b7a7c7..32ee9560 100644 --- a/transform/llvm.go +++ b/transform/llvm.go @@ -75,26 +75,14 @@ func replaceGlobalIntWithArray(mod llvm.Module, name string, buf interface{}) ll return global } -// typeHasPointers returns whether this type is a pointer or contains pointers. -// If the type is an aggregate type, it will check whether there is a pointer -// inside. -func typeHasPointers(t llvm.Type) bool { - switch t.TypeKind() { - case llvm.PointerTypeKind: - return true - case llvm.StructTypeKind: - for _, subType := range t.StructElementTypes() { - if typeHasPointers(subType) { - return true - } +// stripPointerCasts strips instruction pointer casts (getelementptr and +// bitcast) and returns the original value without the casts. +func stripPointerCasts(value llvm.Value) llvm.Value { + if !value.IsAInstruction().IsNil() { + switch value.InstructionOpcode() { + case llvm.GetElementPtr, llvm.BitCast: + return stripPointerCasts(value.Operand(0)) } - return false - case llvm.ArrayTypeKind: - if typeHasPointers(t.ElementType()) { - return true - } - return false - default: - return false } + return value }