diff --git a/src/runtime/arch_tinygowasm.go b/src/runtime/arch_tinygowasm.go index 597b2489..f1893d91 100644 --- a/src/runtime/arch_tinygowasm.go +++ b/src/runtime/arch_tinygowasm.go @@ -58,6 +58,8 @@ var ( globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol)) globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol)) + + stackTop = uintptr(unsafe.Pointer(&globalsStartSymbol)) ) func align(ptr uintptr) uintptr { @@ -67,6 +69,7 @@ func align(ptr uintptr) uintptr { return (ptr + heapAlign - 1) &^ (heapAlign - 1) } +//export tinygo_getCurrentStackPointer func getCurrentStackPointer() uintptr // growHeap tries to grow the heap size. It returns true if it succeeds, false diff --git a/src/runtime/asm_tinygowasm.S b/src/runtime/asm_tinygowasm.S new file mode 100644 index 00000000..3278a7f6 --- /dev/null +++ b/src/runtime/asm_tinygowasm.S @@ -0,0 +1,10 @@ +.globaltype __stack_pointer, i32 + +.global tinygo_getCurrentStackPointer +.hidden tinygo_getCurrentStackPointer +.type tinygo_getCurrentStackPointer,@function +tinygo_getCurrentStackPointer: // func getCurrentStackPointer() uintptr + .functype tinygo_getCurrentStackPointer() -> (i32) + global.get __stack_pointer + return + end_function diff --git a/src/runtime/gc_stack_portable.go b/src/runtime/gc_stack_portable.go index 6ffc35d4..6802de02 100644 --- a/src/runtime/gc_stack_portable.go +++ b/src/runtime/gc_stack_portable.go @@ -4,6 +4,7 @@ package runtime import ( + "internal/task" "unsafe" ) @@ -17,19 +18,28 @@ type stackChainObject struct { // markStack marks all root pointers found on the stack. // -// This implementation is conservative and relies on the compiler inserting code -// to manually push/pop stack objects that are stored in a linked list starting -// with stackChainStart. Manually keeping track of stack values is _much_ more -// expensive than letting the compiler do it and it inhibits a few important -// optimizations, but it has the big advantage of being portable to basically -// any ISA, including WebAssembly. +// - Goroutine stacks are heap allocated and always reachable in some way +// (for example through internal/task.currentTask) so they will always be +// scanned. +// - The system stack (aka startup stack) is not heap allocated, so even +// though it may be referenced it will not be scanned by default. +// +// Therefore, we only need to scan the system stack. +// It is relatively easy to scan the system stack while we're on it: we can +// simply read __stack_pointer and __global_base and scan the area inbetween. +// Unfortunately, it's hard to get the system stack pointer while we're on a +// goroutine stack. But when we're on a goroutine stack, the system stack is in +// the scheduler which means there shouldn't be anything on the system stack +// anyway. +// ...I hope this assumption holds, otherwise we will need to store the system +// stack in a global or something. +// +// The compiler also inserts code to store all globals in a chain via +// stackChainStart. Luckily we don't need to scan these, as these globals are +// stored on the goroutine stack and are therefore already getting scanned. func markStack() { - stackObject := stackChainStart - for stackObject != nil { - start := uintptr(unsafe.Pointer(stackObject)) + unsafe.Sizeof(uintptr(0))*2 - end := start + stackObject.numSlots*unsafe.Alignof(uintptr(0)) - markRoots(start, end) - stackObject = stackObject.parent + if task.OnSystemStack() { + markRoots(getCurrentStackPointer(), stackTop) } } diff --git a/targets/wasi.json b/targets/wasi.json index e710b4bb..6cec6be4 100644 --- a/targets/wasi.json +++ b/targets/wasi.json @@ -18,6 +18,9 @@ "--stack-first", "--no-demangle" ], + "extra-files": [ + "src/runtime/asm_tinygowasm.S" + ], "emulator": "wasmtime {}", "wasm-abi": "generic" } diff --git a/targets/wasm.json b/targets/wasm.json index 2bcada5f..26494cc4 100644 --- a/targets/wasm.json +++ b/targets/wasm.json @@ -19,6 +19,9 @@ "--stack-first", "--no-demangle" ], + "extra-files": [ + "src/runtime/asm_tinygowasm.S" + ], "emulator": "node {root}/targets/wasm_exec.js {}", "wasm-abi": "js" } diff --git a/transform/gc.go b/transform/gc.go index eb3520aa..87dc6e88 100644 --- a/transform/gc.go +++ b/transform/gc.go @@ -139,7 +139,7 @@ func MakeGCStackSlots(mod llvm.Module) bool { } // Determine what to do with each call. - var allocas, pointers []llvm.Value + var pointers []llvm.Value for _, call := range calls { ptr := call.Operand(0) call.EraseFromParentAsInstruction() @@ -189,16 +189,15 @@ func MakeGCStackSlots(mod llvm.Module) bool { // be optimized if needed. } - if !ptr.IsAAllocaInst().IsNil() { - if typeHasPointers(ptr.Type().ElementType()) { - allocas = append(allocas, ptr) - } - } else { - pointers = append(pointers, ptr) + if ptr := stripPointerCasts(ptr); !ptr.IsAAllocaInst().IsNil() { + // Allocas don't need to be tracked because they are allocated + // on the C stack which is scanned separately. + continue } + pointers = append(pointers, ptr) } - if len(allocas) == 0 && len(pointers) == 0 { + if len(pointers) == 0 { // This function does not need to keep track of stack pointers. continue } @@ -208,9 +207,6 @@ func MakeGCStackSlots(mod llvm.Module) bool { stackChainStartType, // Pointer to parent frame. uintptrType, // Number of elements in this frame. } - for _, alloca := range allocas { - fields = append(fields, alloca.Type().ElementType()) - } for _, ptr := range pointers { fields = append(fields, ptr.Type()) } @@ -235,16 +231,6 @@ func MakeGCStackSlots(mod llvm.Module) bool { stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "") builder.CreateStore(stackObjectCast, stackChainStart) - // Replace all independent allocas with GEPs in the stack object. - for i, alloca := range allocas { - gep := builder.CreateGEP(stackObject, []llvm.Value{ - llvm.ConstInt(ctx.Int32Type(), 0, false), - llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false), - }, "") - alloca.ReplaceAllUsesWith(gep) - alloca.EraseFromParentAsInstruction() - } - // Do a store to the stack object after each new pointer that is created. pointerStores := make(map[llvm.Value]struct{}) for i, ptr := range pointers { @@ -260,7 +246,7 @@ func MakeGCStackSlots(mod llvm.Module) bool { // Extract a pointer to the appropriate section of the stack object. gep := builder.CreateGEP(stackObject, []llvm.Value{ llvm.ConstInt(ctx.Int32Type(), 0, false), - llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false), + llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false), }, "") // Store the pointer into the stack slot. diff --git a/transform/llvm.go b/transform/llvm.go index 90b7a7c7..32ee9560 100644 --- a/transform/llvm.go +++ b/transform/llvm.go @@ -75,26 +75,14 @@ func replaceGlobalIntWithArray(mod llvm.Module, name string, buf interface{}) ll return global } -// typeHasPointers returns whether this type is a pointer or contains pointers. -// If the type is an aggregate type, it will check whether there is a pointer -// inside. -func typeHasPointers(t llvm.Type) bool { - switch t.TypeKind() { - case llvm.PointerTypeKind: - return true - case llvm.StructTypeKind: - for _, subType := range t.StructElementTypes() { - if typeHasPointers(subType) { - return true - } +// stripPointerCasts strips instruction pointer casts (getelementptr and +// bitcast) and returns the original value without the casts. +func stripPointerCasts(value llvm.Value) llvm.Value { + if !value.IsAInstruction().IsNil() { + switch value.InstructionOpcode() { + case llvm.GetElementPtr, llvm.BitCast: + return stripPointerCasts(value.Operand(0)) } - return false - case llvm.ArrayTypeKind: - if typeHasPointers(t.ElementType()) { - return true - } - return false - default: - return false } + return value }