wasm: fix GC scanning of allocas
Scanning of allocas was entirely broken on WebAssembly. The code intended to do this was never run. There were also no tests. Looking into this further, I found that it is actually not really necessary to do that: the C stack can be scanned conservatively and in fact this was already done for goroutine stacks (because they live on the heap and are always referenced). It wasn't done for the system stack however. With these fixes, I believe code should be both faster *and* more correct. I found this in my work to get opaque pointers supported in LLVM 15, because the code that was never reached now finally got run and was actually quite buggy.
Этот коммит содержится в:
родитель
6b46ae261a
коммит
65d65c1313
7 изменённых файлов: 57 добавлений и 54 удалений
|
@ -58,6 +58,8 @@ var (
|
||||||
|
|
||||||
globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol))
|
globalsStart = uintptr(unsafe.Pointer(&globalsStartSymbol))
|
||||||
globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol))
|
globalsEnd = uintptr(unsafe.Pointer(&heapStartSymbol))
|
||||||
|
|
||||||
|
stackTop = uintptr(unsafe.Pointer(&globalsStartSymbol))
|
||||||
)
|
)
|
||||||
|
|
||||||
func align(ptr uintptr) uintptr {
|
func align(ptr uintptr) uintptr {
|
||||||
|
@ -67,6 +69,7 @@ func align(ptr uintptr) uintptr {
|
||||||
return (ptr + heapAlign - 1) &^ (heapAlign - 1)
|
return (ptr + heapAlign - 1) &^ (heapAlign - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//export tinygo_getCurrentStackPointer
|
||||||
func getCurrentStackPointer() uintptr
|
func getCurrentStackPointer() uintptr
|
||||||
|
|
||||||
// growHeap tries to grow the heap size. It returns true if it succeeds, false
|
// growHeap tries to grow the heap size. It returns true if it succeeds, false
|
||||||
|
|
10
src/runtime/asm_tinygowasm.S
Обычный файл
10
src/runtime/asm_tinygowasm.S
Обычный файл
|
@ -0,0 +1,10 @@
|
||||||
|
.globaltype __stack_pointer, i32
|
||||||
|
|
||||||
|
.global tinygo_getCurrentStackPointer
|
||||||
|
.hidden tinygo_getCurrentStackPointer
|
||||||
|
.type tinygo_getCurrentStackPointer,@function
|
||||||
|
tinygo_getCurrentStackPointer: // func getCurrentStackPointer() uintptr
|
||||||
|
.functype tinygo_getCurrentStackPointer() -> (i32)
|
||||||
|
global.get __stack_pointer
|
||||||
|
return
|
||||||
|
end_function
|
|
@ -4,6 +4,7 @@
|
||||||
package runtime
|
package runtime
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"internal/task"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,19 +18,28 @@ type stackChainObject struct {
|
||||||
|
|
||||||
// markStack marks all root pointers found on the stack.
|
// markStack marks all root pointers found on the stack.
|
||||||
//
|
//
|
||||||
// This implementation is conservative and relies on the compiler inserting code
|
// - Goroutine stacks are heap allocated and always reachable in some way
|
||||||
// to manually push/pop stack objects that are stored in a linked list starting
|
// (for example through internal/task.currentTask) so they will always be
|
||||||
// with stackChainStart. Manually keeping track of stack values is _much_ more
|
// scanned.
|
||||||
// expensive than letting the compiler do it and it inhibits a few important
|
// - The system stack (aka startup stack) is not heap allocated, so even
|
||||||
// optimizations, but it has the big advantage of being portable to basically
|
// though it may be referenced it will not be scanned by default.
|
||||||
// any ISA, including WebAssembly.
|
//
|
||||||
|
// Therefore, we only need to scan the system stack.
|
||||||
|
// It is relatively easy to scan the system stack while we're on it: we can
|
||||||
|
// simply read __stack_pointer and __global_base and scan the area inbetween.
|
||||||
|
// Unfortunately, it's hard to get the system stack pointer while we're on a
|
||||||
|
// goroutine stack. But when we're on a goroutine stack, the system stack is in
|
||||||
|
// the scheduler which means there shouldn't be anything on the system stack
|
||||||
|
// anyway.
|
||||||
|
// ...I hope this assumption holds, otherwise we will need to store the system
|
||||||
|
// stack in a global or something.
|
||||||
|
//
|
||||||
|
// The compiler also inserts code to store all globals in a chain via
|
||||||
|
// stackChainStart. Luckily we don't need to scan these, as these globals are
|
||||||
|
// stored on the goroutine stack and are therefore already getting scanned.
|
||||||
func markStack() {
|
func markStack() {
|
||||||
stackObject := stackChainStart
|
if task.OnSystemStack() {
|
||||||
for stackObject != nil {
|
markRoots(getCurrentStackPointer(), stackTop)
|
||||||
start := uintptr(unsafe.Pointer(stackObject)) + unsafe.Sizeof(uintptr(0))*2
|
|
||||||
end := start + stackObject.numSlots*unsafe.Alignof(uintptr(0))
|
|
||||||
markRoots(start, end)
|
|
||||||
stackObject = stackObject.parent
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
"--stack-first",
|
"--stack-first",
|
||||||
"--no-demangle"
|
"--no-demangle"
|
||||||
],
|
],
|
||||||
|
"extra-files": [
|
||||||
|
"src/runtime/asm_tinygowasm.S"
|
||||||
|
],
|
||||||
"emulator": "wasmtime {}",
|
"emulator": "wasmtime {}",
|
||||||
"wasm-abi": "generic"
|
"wasm-abi": "generic"
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
"--stack-first",
|
"--stack-first",
|
||||||
"--no-demangle"
|
"--no-demangle"
|
||||||
],
|
],
|
||||||
|
"extra-files": [
|
||||||
|
"src/runtime/asm_tinygowasm.S"
|
||||||
|
],
|
||||||
"emulator": "node {root}/targets/wasm_exec.js {}",
|
"emulator": "node {root}/targets/wasm_exec.js {}",
|
||||||
"wasm-abi": "js"
|
"wasm-abi": "js"
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,7 +139,7 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine what to do with each call.
|
// Determine what to do with each call.
|
||||||
var allocas, pointers []llvm.Value
|
var pointers []llvm.Value
|
||||||
for _, call := range calls {
|
for _, call := range calls {
|
||||||
ptr := call.Operand(0)
|
ptr := call.Operand(0)
|
||||||
call.EraseFromParentAsInstruction()
|
call.EraseFromParentAsInstruction()
|
||||||
|
@ -189,16 +189,15 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
||||||
// be optimized if needed.
|
// be optimized if needed.
|
||||||
}
|
}
|
||||||
|
|
||||||
if !ptr.IsAAllocaInst().IsNil() {
|
if ptr := stripPointerCasts(ptr); !ptr.IsAAllocaInst().IsNil() {
|
||||||
if typeHasPointers(ptr.Type().ElementType()) {
|
// Allocas don't need to be tracked because they are allocated
|
||||||
allocas = append(allocas, ptr)
|
// on the C stack which is scanned separately.
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
pointers = append(pointers, ptr)
|
pointers = append(pointers, ptr)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if len(allocas) == 0 && len(pointers) == 0 {
|
if len(pointers) == 0 {
|
||||||
// This function does not need to keep track of stack pointers.
|
// This function does not need to keep track of stack pointers.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -208,9 +207,6 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
||||||
stackChainStartType, // Pointer to parent frame.
|
stackChainStartType, // Pointer to parent frame.
|
||||||
uintptrType, // Number of elements in this frame.
|
uintptrType, // Number of elements in this frame.
|
||||||
}
|
}
|
||||||
for _, alloca := range allocas {
|
|
||||||
fields = append(fields, alloca.Type().ElementType())
|
|
||||||
}
|
|
||||||
for _, ptr := range pointers {
|
for _, ptr := range pointers {
|
||||||
fields = append(fields, ptr.Type())
|
fields = append(fields, ptr.Type())
|
||||||
}
|
}
|
||||||
|
@ -235,16 +231,6 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
||||||
stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "")
|
stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "")
|
||||||
builder.CreateStore(stackObjectCast, stackChainStart)
|
builder.CreateStore(stackObjectCast, stackChainStart)
|
||||||
|
|
||||||
// Replace all independent allocas with GEPs in the stack object.
|
|
||||||
for i, alloca := range allocas {
|
|
||||||
gep := builder.CreateGEP(stackObject, []llvm.Value{
|
|
||||||
llvm.ConstInt(ctx.Int32Type(), 0, false),
|
|
||||||
llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false),
|
|
||||||
}, "")
|
|
||||||
alloca.ReplaceAllUsesWith(gep)
|
|
||||||
alloca.EraseFromParentAsInstruction()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do a store to the stack object after each new pointer that is created.
|
// Do a store to the stack object after each new pointer that is created.
|
||||||
pointerStores := make(map[llvm.Value]struct{})
|
pointerStores := make(map[llvm.Value]struct{})
|
||||||
for i, ptr := range pointers {
|
for i, ptr := range pointers {
|
||||||
|
@ -260,7 +246,7 @@ func MakeGCStackSlots(mod llvm.Module) bool {
|
||||||
// Extract a pointer to the appropriate section of the stack object.
|
// Extract a pointer to the appropriate section of the stack object.
|
||||||
gep := builder.CreateGEP(stackObject, []llvm.Value{
|
gep := builder.CreateGEP(stackObject, []llvm.Value{
|
||||||
llvm.ConstInt(ctx.Int32Type(), 0, false),
|
llvm.ConstInt(ctx.Int32Type(), 0, false),
|
||||||
llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false),
|
llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false),
|
||||||
}, "")
|
}, "")
|
||||||
|
|
||||||
// Store the pointer into the stack slot.
|
// Store the pointer into the stack slot.
|
||||||
|
|
|
@ -75,26 +75,14 @@ func replaceGlobalIntWithArray(mod llvm.Module, name string, buf interface{}) ll
|
||||||
return global
|
return global
|
||||||
}
|
}
|
||||||
|
|
||||||
// typeHasPointers returns whether this type is a pointer or contains pointers.
|
// stripPointerCasts strips instruction pointer casts (getelementptr and
|
||||||
// If the type is an aggregate type, it will check whether there is a pointer
|
// bitcast) and returns the original value without the casts.
|
||||||
// inside.
|
func stripPointerCasts(value llvm.Value) llvm.Value {
|
||||||
func typeHasPointers(t llvm.Type) bool {
|
if !value.IsAInstruction().IsNil() {
|
||||||
switch t.TypeKind() {
|
switch value.InstructionOpcode() {
|
||||||
case llvm.PointerTypeKind:
|
case llvm.GetElementPtr, llvm.BitCast:
|
||||||
return true
|
return stripPointerCasts(value.Operand(0))
|
||||||
case llvm.StructTypeKind:
|
|
||||||
for _, subType := range t.StructElementTypes() {
|
|
||||||
if typeHasPointers(subType) {
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false
|
return value
|
||||||
case llvm.ArrayTypeKind:
|
|
||||||
if typeHasPointers(t.ElementType()) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче