interp: do not unroll loops
This change triggers a revert whenever a basic block runs instructions at runtime twice. As a result, a loop body with runtime-only instructions will no longer be unrolled. This should help some extreme cases where loops can be expanded into hundreds or thousands of instructions.
Этот коммит содержится в:
родитель
2d61972475
коммит
f2e576decf
4 изменённых файлов: 106 добавлений и 1 удалений
|
@ -18,6 +18,7 @@ var (
|
||||||
errUnsupportedInst = errors.New("interp: unsupported instruction")
|
errUnsupportedInst = errors.New("interp: unsupported instruction")
|
||||||
errUnsupportedRuntimeInst = errors.New("interp: unsupported instruction (to be emitted at runtime)")
|
errUnsupportedRuntimeInst = errors.New("interp: unsupported instruction (to be emitted at runtime)")
|
||||||
errMapAlreadyCreated = errors.New("interp: map already created")
|
errMapAlreadyCreated = errors.New("interp: map already created")
|
||||||
|
errLoopUnrolled = errors.New("interp: loop unrolled")
|
||||||
)
|
)
|
||||||
|
|
||||||
// This is one of the errors that can be returned from toLLVMValue when the
|
// This is one of the errors that can be returned from toLLVMValue when the
|
||||||
|
@ -26,7 +27,9 @@ var (
|
||||||
var errInvalidPtrToIntSize = errors.New("interp: ptrtoint integer size does not equal pointer size")
|
var errInvalidPtrToIntSize = errors.New("interp: ptrtoint integer size does not equal pointer size")
|
||||||
|
|
||||||
func isRecoverableError(err error) bool {
|
func isRecoverableError(err error) bool {
|
||||||
return err == errIntegerAsPointer || err == errUnsupportedInst || err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated
|
return err == errIntegerAsPointer || err == errUnsupportedInst ||
|
||||||
|
err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated ||
|
||||||
|
err == errLoopUnrolled
|
||||||
}
|
}
|
||||||
|
|
||||||
// ErrorLine is one line in a traceback. The position may be missing.
|
// ErrorLine is one line in a traceback. The position may be missing.
|
||||||
|
|
|
@ -24,15 +24,39 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
|
||||||
locals[i] = param
|
locals[i] = param
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track what blocks have run instructions at runtime.
|
||||||
|
// This is used to prevent unrolling.
|
||||||
|
var runtimeBlocks map[int]struct{}
|
||||||
|
|
||||||
// Start with the first basic block and the first instruction.
|
// Start with the first basic block and the first instruction.
|
||||||
// Branch instructions may modify both bb and instIndex when branching.
|
// Branch instructions may modify both bb and instIndex when branching.
|
||||||
bb := fn.blocks[0]
|
bb := fn.blocks[0]
|
||||||
currentBB := 0
|
currentBB := 0
|
||||||
lastBB := -1 // last basic block is undefined, only defined after a branch
|
lastBB := -1 // last basic block is undefined, only defined after a branch
|
||||||
var operands []value
|
var operands []value
|
||||||
|
startRTInsts := len(mem.instructions)
|
||||||
for instIndex := 0; instIndex < len(bb.instructions); instIndex++ {
|
for instIndex := 0; instIndex < len(bb.instructions); instIndex++ {
|
||||||
if instIndex == 0 {
|
if instIndex == 0 {
|
||||||
// This is the start of a new basic block.
|
// This is the start of a new basic block.
|
||||||
|
if len(mem.instructions) != startRTInsts {
|
||||||
|
if _, ok := runtimeBlocks[lastBB]; ok {
|
||||||
|
// This loop has been unrolled.
|
||||||
|
// Avoid doing this, as it can result in a large amount of extra machine code.
|
||||||
|
// This currently uses the branch from the last block, as there is no available information to give a better location.
|
||||||
|
lastBBInsts := fn.blocks[lastBB].instructions
|
||||||
|
return nil, mem, r.errorAt(lastBBInsts[len(lastBBInsts)-1], errLoopUnrolled)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flag the last block as having run stuff at runtime.
|
||||||
|
if runtimeBlocks == nil {
|
||||||
|
runtimeBlocks = make(map[int]struct{})
|
||||||
|
}
|
||||||
|
runtimeBlocks[lastBB] = struct{}{}
|
||||||
|
|
||||||
|
// Reset the block-start runtime instructions counter.
|
||||||
|
startRTInsts = len(mem.instructions)
|
||||||
|
}
|
||||||
|
|
||||||
// There may be PHI nodes that need to be resolved. Resolve all PHI
|
// There may be PHI nodes that need to be resolved. Resolve all PHI
|
||||||
// nodes before continuing with regular instructions.
|
// nodes before continuing with regular instructions.
|
||||||
// PHI nodes need to be treated specially because they can have a
|
// PHI nodes need to be treated specially because they can have a
|
||||||
|
|
45
interp/testdata/revert.ll
предоставленный
45
interp/testdata/revert.ll
предоставленный
|
@ -3,6 +3,8 @@ target triple = "x86_64--linux"
|
||||||
|
|
||||||
declare void @externalCall(i64)
|
declare void @externalCall(i64)
|
||||||
|
|
||||||
|
declare i64 @ptrHash(i8* nocapture)
|
||||||
|
|
||||||
@foo.knownAtRuntime = global i64 0
|
@foo.knownAtRuntime = global i64 0
|
||||||
@bar.knownAtRuntime = global i64 0
|
@bar.knownAtRuntime = global i64 0
|
||||||
@baz.someGlobal = external global [3 x {i64, i32}]
|
@baz.someGlobal = external global [3 x {i64, i32}]
|
||||||
|
@ -10,6 +12,8 @@ declare void @externalCall(i64)
|
||||||
@x.atomicNum = global i32 0
|
@x.atomicNum = global i32 0
|
||||||
@x.volatileNum = global i32 0
|
@x.volatileNum = global i32 0
|
||||||
@y.ready = global i32 0
|
@y.ready = global i32 0
|
||||||
|
@z.bloom = global i64 0
|
||||||
|
@z.arr = global [32 x i8] zeroinitializer
|
||||||
|
|
||||||
define void @runtime.initAll() unnamed_addr {
|
define void @runtime.initAll() unnamed_addr {
|
||||||
entry:
|
entry:
|
||||||
|
@ -19,6 +23,7 @@ entry:
|
||||||
call void @main.init(i8* undef)
|
call void @main.init(i8* undef)
|
||||||
call void @x.init(i8* undef)
|
call void @x.init(i8* undef)
|
||||||
call void @y.init(i8* undef)
|
call void @y.init(i8* undef)
|
||||||
|
call void @z.init(i8* undef)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,3 +77,43 @@ loop:
|
||||||
end:
|
end:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define internal void @z.init(i8* %context) unnamed_addr {
|
||||||
|
%bloom = bitcast i64* @z.bloom to i8*
|
||||||
|
|
||||||
|
; This can be safely expanded.
|
||||||
|
call void @z.setArr(i8* %bloom, i64 1, i8* %bloom)
|
||||||
|
|
||||||
|
; This call should be reverted to prevent unrolling.
|
||||||
|
call void @z.setArr(i8* bitcast ([32 x i8]* @z.arr to i8*), i64 32, i8* %bloom)
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr {
|
||||||
|
entry:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%prev = phi i64 [ %n, %entry ], [ %idx, %loop ]
|
||||||
|
%idx = sub i64 %prev, 1
|
||||||
|
%elem = getelementptr i8, i8* %arr, i64 %idx
|
||||||
|
call void @z.set(i8* %elem, i8* %context)
|
||||||
|
%done = icmp eq i64 %idx, 0
|
||||||
|
br i1 %done, label %end, label %loop
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal void @z.set(i8* %ptr, i8* %context) unnamed_addr {
|
||||||
|
; Insert the pointer into the Bloom filter.
|
||||||
|
%hash = call i64 @ptrHash(i8* %ptr)
|
||||||
|
%index = lshr i64 %hash, 58
|
||||||
|
%bit = shl i64 1, %index
|
||||||
|
%bloom = bitcast i8* %context to i64*
|
||||||
|
%old = load i64, i64* %bloom
|
||||||
|
%new = or i64 %old, %bit
|
||||||
|
store i64 %new, i64* %bloom
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
33
interp/testdata/revert.out.ll
предоставленный
33
interp/testdata/revert.out.ll
предоставленный
|
@ -8,9 +8,13 @@ target triple = "x86_64--linux"
|
||||||
@x.atomicNum = local_unnamed_addr global i32 0
|
@x.atomicNum = local_unnamed_addr global i32 0
|
||||||
@x.volatileNum = global i32 0
|
@x.volatileNum = global i32 0
|
||||||
@y.ready = local_unnamed_addr global i32 0
|
@y.ready = local_unnamed_addr global i32 0
|
||||||
|
@z.bloom = global i64 0
|
||||||
|
@z.arr = global [32 x i8] zeroinitializer
|
||||||
|
|
||||||
declare void @externalCall(i64) local_unnamed_addr
|
declare void @externalCall(i64) local_unnamed_addr
|
||||||
|
|
||||||
|
declare i64 @ptrHash(i8* nocapture) local_unnamed_addr
|
||||||
|
|
||||||
define void @runtime.initAll() unnamed_addr {
|
define void @runtime.initAll() unnamed_addr {
|
||||||
entry:
|
entry:
|
||||||
call fastcc void @baz.init(i8* undef)
|
call fastcc void @baz.init(i8* undef)
|
||||||
|
@ -24,6 +28,8 @@ entry:
|
||||||
%y = load volatile i32, i32* @x.volatileNum, align 4
|
%y = load volatile i32, i32* @x.volatileNum, align 4
|
||||||
store volatile i32 %y, i32* @x.volatileNum, align 4
|
store volatile i32 %y, i32* @x.volatileNum, align 4
|
||||||
call fastcc void @y.init(i8* undef)
|
call fastcc void @y.init(i8* undef)
|
||||||
|
call fastcc void @z.set(i8* bitcast (i64* @z.bloom to i8*), i8* bitcast (i64* @z.bloom to i8*))
|
||||||
|
call fastcc void @z.setArr(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @z.arr, i32 0, i32 0), i64 32, i8* bitcast (i64* @z.bloom to i8*))
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,3 +54,30 @@ loop: ; preds = %loop, %entry
|
||||||
end: ; preds = %loop
|
end: ; preds = %loop
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define internal fastcc void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr {
|
||||||
|
entry:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop: ; preds = %loop, %entry
|
||||||
|
%prev = phi i64 [ %n, %entry ], [ %idx, %loop ]
|
||||||
|
%idx = sub i64 %prev, 1
|
||||||
|
%elem = getelementptr i8, i8* %arr, i64 %idx
|
||||||
|
call fastcc void @z.set(i8* %elem, i8* %context)
|
||||||
|
%done = icmp eq i64 %idx, 0
|
||||||
|
br i1 %done, label %end, label %loop
|
||||||
|
|
||||||
|
end: ; preds = %loop
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal fastcc void @z.set(i8* %ptr, i8* %context) unnamed_addr {
|
||||||
|
%hash = call i64 @ptrHash(i8* %ptr)
|
||||||
|
%index = lshr i64 %hash, 58
|
||||||
|
%bit = shl i64 1, %index
|
||||||
|
%bloom = bitcast i8* %context to i64*
|
||||||
|
%old = load i64, i64* %bloom, align 8
|
||||||
|
%new = or i64 %old, %bit
|
||||||
|
store i64 %new, i64* %bloom, align 8
|
||||||
|
ret void
|
||||||
|
}
|
Загрузка…
Создание таблицы
Сослаться в новой задаче