transform: improve GC stack slot pass to work around a bug

Bug 1790 ("musttail call must precede a ret with an optional bitcast")
is caused by the GC stack slot pass inserting a store instruction
between a musttail call and a return instruction. This is not allowed in
LLVM IR.

One solution would be to remove the musttail. That would probably work,
but 1) the go-llvm API doesn't support this and 2) this might have
unforeseen consequences. What I've done in this commit is to move the
store instruction to a position earlier in the basic block, just after
the last access to the GC stack slot alloca.

Thanks to @fgsch for a very small repro, which I've used as a regression
test.
Этот коммит содержится в:
Ayke van Laethem 2021-07-31 19:17:07 +02:00 коммит произвёл Ron Evans
родитель 98e70c9b19
коммит ab47cea055
4 изменённых файлов: 61 добавлений и 3 удалений

12
testdata/goroutines.go предоставленный
Просмотреть файл

@ -76,6 +76,8 @@ func main() {
testGoOnBuiltins()
testCond()
testIssue1790()
}
func acquire(m *sync.Mutex) {
@ -198,3 +200,13 @@ func testCond() {
panic("missing queued notification")
}
}
var once sync.Once
// This tests a fix for issue 1790:
// https://github.com/tinygo-org/tinygo/issues/1790
func testIssue1790() *int {
once.Do(func() {})
i := 0
return &i
}

Просмотреть файл

@ -246,6 +246,7 @@ func MakeGCStackSlots(mod llvm.Module) bool {
}
// Do a store to the stack object after each new pointer that is created.
pointerStores := make(map[llvm.Value]struct{})
for i, ptr := range pointers {
// Insert the store after the pointer value is created.
insertionPoint := llvm.NextInstruction(ptr)
@ -263,13 +264,44 @@ func MakeGCStackSlots(mod llvm.Module) bool {
}, "")
// Store the pointer into the stack slot.
builder.CreateStore(ptr, gep)
store := builder.CreateStore(ptr, gep)
pointerStores[store] = struct{}{}
}
// Make sure this stack object is popped from the linked list of stack
// objects at return.
for _, ret := range returns {
builder.SetInsertPointBefore(ret)
inst := ret
// Try to do the popping of the stack object earlier, by inserting
// it not right before the return instruction but moving the insert
// position up.
// This is necessary so that the GC stack slot pass doesn't
// interfere with tail calls (in particular, musttail calls).
for {
prevInst := llvm.PrevInstruction(inst)
if prevInst == parent {
break
}
if _, ok := pointerStores[prevInst]; ok {
// Pop the stack object after the last store instruction.
// This can probably be made more efficient: storing to the
// stack chain object and then immediately popping isn't
// useful.
break
}
if prevInst.IsNil() {
// Start of basic block. Pop the stack object here.
break
}
if !prevInst.IsAPHINode().IsNil() {
// Do not insert before a PHI node. PHI nodes must be
// grouped at the beginning of a basic block before any
// other instruction.
break
}
inst = prevInst
}
builder.SetInsertPointBefore(inst)
builder.CreateStore(parent, stackChainStart)
}
}

8
transform/testdata/gc-stackslots.ll предоставленный
Просмотреть файл

@ -20,6 +20,10 @@ define i8* @needsStackSlots() {
; so tracking it is not really necessary.
%ptr = call i8* @runtime.alloc(i32 4)
call void @runtime.trackPointer(i8* %ptr)
; Restoring the stack pointer can happen at this position, before the return.
; This avoids issues with tail calls.
call void @someArbitraryFunction()
%val = load i8, i8* @someGlobal
ret i8* %ptr
}
@ -95,3 +99,7 @@ define void @testGEPBitcast() {
call void @runtime.trackPointer(i8* %other)
ret void
}
define void @someArbitraryFunction() {
ret void
}

8
transform/testdata/gc-stackslots.out.ll предоставленный
Просмотреть файл

@ -26,6 +26,8 @@ define i8* @needsStackSlots() {
%4 = getelementptr { %runtime.stackChainObject*, i32, i8* }, { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject, i32 0, i32 2
store i8* %ptr, i8** %4
store %runtime.stackChainObject* %1, %runtime.stackChainObject** @runtime.stackChainStart
call void @someArbitraryFunction()
%val = load i8, i8* @someGlobal
ret i8* %ptr
}
@ -73,8 +75,8 @@ define i8* @fibNext(i8* %x, i8* %y) {
%out.alloc = call i8* @runtime.alloc(i32 1)
%4 = getelementptr { %runtime.stackChainObject*, i32, i8* }, { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject, i32 0, i32 2
store i8* %out.alloc, i8** %4
store i8 %out.val, i8* %out.alloc
store %runtime.stackChainObject* %1, %runtime.stackChainObject** @runtime.stackChainStart
store i8 %out.val, i8* %out.alloc
ret i8* %out.alloc
}
@ -135,3 +137,7 @@ define void @testGEPBitcast() {
store %runtime.stackChainObject* %1, %runtime.stackChainObject** @runtime.stackChainStart
ret void
}
define void @someArbitraryFunction() {
ret void
}