compiler,runtime: translate memzero calls to LLVM memset intrinsics
This gives the optimizer a bit more information about what the calls do. This should result in slightly better generated code. Code size sometimes goes up and sometimes goes down. I blame the code size going up on the inliner which inlines more functions, because compiling the smoke tests in the drivers repository with -opt=1 results in a slight code size reduction in all cases.
Этот коммит содержится в:
родитель
eaa54bc7e3
коммит
91d1a23b14
3 изменённых файлов: 25 добавлений и 5 удалений
|
@ -1343,6 +1343,8 @@ func (b *builder) createFunctionCall(instr *ssa.CallCommon) (llvm.Value, error)
|
||||||
switch {
|
switch {
|
||||||
case name == "runtime.memcpy" || name == "runtime.memmove" || name == "reflect.memcpy":
|
case name == "runtime.memcpy" || name == "runtime.memmove" || name == "reflect.memcpy":
|
||||||
return b.createMemoryCopyCall(fn, instr.Args)
|
return b.createMemoryCopyCall(fn, instr.Args)
|
||||||
|
case name == "runtime.memzero":
|
||||||
|
return b.createMemoryZeroCall(instr.Args)
|
||||||
case name == "device/arm.ReadRegister" || name == "device/riscv.ReadRegister":
|
case name == "device/arm.ReadRegister" || name == "device/riscv.ReadRegister":
|
||||||
return b.createReadRegister(name, instr.Args)
|
return b.createReadRegister(name, instr.Args)
|
||||||
case name == "device/arm.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm":
|
case name == "device/arm.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm":
|
||||||
|
|
|
@ -28,3 +28,23 @@ func (b *builder) createMemoryCopyCall(fn *ssa.Function, args []ssa.Value) (llvm
|
||||||
b.CreateCall(llvmFn, params, "")
|
b.CreateCall(llvmFn, params, "")
|
||||||
return llvm.Value{}, nil
|
return llvm.Value{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// createMemoryZeroCall creates calls to llvm.memset.* to zero a block of
|
||||||
|
// memory, declaring the function if needed. These calls will be lowered to
|
||||||
|
// regular libc memset calls if they aren't optimized out in a different way.
|
||||||
|
func (b *builder) createMemoryZeroCall(args []ssa.Value) (llvm.Value, error) {
|
||||||
|
fnName := "llvm.memset.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth())
|
||||||
|
llvmFn := b.mod.NamedFunction(fnName)
|
||||||
|
if llvmFn.IsNil() {
|
||||||
|
fnType := llvm.FunctionType(b.ctx.VoidType(), []llvm.Type{b.i8ptrType, b.ctx.Int8Type(), b.uintptrType, b.ctx.Int1Type()}, false)
|
||||||
|
llvmFn = llvm.AddFunction(b.mod, fnName, fnType)
|
||||||
|
}
|
||||||
|
params := []llvm.Value{
|
||||||
|
b.getValue(args[0]),
|
||||||
|
llvm.ConstInt(b.ctx.Int8Type(), 0, false),
|
||||||
|
b.getValue(args[1]),
|
||||||
|
llvm.ConstInt(b.ctx.Int1Type(), 0, false),
|
||||||
|
}
|
||||||
|
b.CreateCall(llvmFn, params, "")
|
||||||
|
return llvm.Value{}, nil
|
||||||
|
}
|
||||||
|
|
|
@ -41,11 +41,9 @@ func memcpy(dst, src unsafe.Pointer, size uintptr)
|
||||||
func memmove(dst, src unsafe.Pointer, size uintptr)
|
func memmove(dst, src unsafe.Pointer, size uintptr)
|
||||||
|
|
||||||
// Set the given number of bytes to zero.
|
// Set the given number of bytes to zero.
|
||||||
func memzero(ptr unsafe.Pointer, size uintptr) {
|
// Calls to this function are converted to LLVM intrinsic calls such as
|
||||||
for i := uintptr(0); i < size; i++ {
|
// llvm.memset.p0i8.i32(ptr, 0, size, false).
|
||||||
*(*byte)(unsafe.Pointer(uintptr(ptr) + i)) = 0
|
func memzero(ptr unsafe.Pointer, size uintptr)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compare two same-size buffers for equality.
|
// Compare two same-size buffers for equality.
|
||||||
func memequal(x, y unsafe.Pointer, n uintptr) bool {
|
func memequal(x, y unsafe.Pointer, n uintptr) bool {
|
||||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче