compiler: replace some math operation bodies with fast intrinsics
Instead of changing the calls, replace the function bodies themselves. This is useful for a number of reasons, see https://github.com/tinygo-org/tinygo/pull/2920 for more information. I have removed the math intrinsics tests because they are no longer useful. Instead, I think `tinygo test math` should suffice.
Этот коммит содержится в:
родитель
4695da83b7
коммит
20a7a6fd54
7 изменённых файлов: 38 добавлений и 127 удалений
|
@ -790,6 +790,10 @@ func (c *compilerContext) createPackage(irbuilder llvm.Builder, pkg *ssa.Package
|
||||||
}
|
}
|
||||||
// Create the function definition.
|
// Create the function definition.
|
||||||
b := newBuilder(c, irbuilder, member)
|
b := newBuilder(c, irbuilder, member)
|
||||||
|
if _, ok := mathToLLVMMapping[member.RelString(nil)]; ok {
|
||||||
|
b.defineMathOp()
|
||||||
|
continue
|
||||||
|
}
|
||||||
if member.Blocks == nil {
|
if member.Blocks == nil {
|
||||||
// Try to define this as an intrinsic function.
|
// Try to define this as an intrinsic function.
|
||||||
b.defineIntrinsicFunction()
|
b.defineIntrinsicFunction()
|
||||||
|
@ -1024,7 +1028,7 @@ func (c *compilerContext) getEmbedFileString(file *loader.EmbedFile) llvm.Value
|
||||||
// parameters, create basic blocks, and set up debug information.
|
// parameters, create basic blocks, and set up debug information.
|
||||||
// This is separated out from createFunction() so that it is also usable to
|
// This is separated out from createFunction() so that it is also usable to
|
||||||
// define compiler intrinsics like the atomic operations in sync/atomic.
|
// define compiler intrinsics like the atomic operations in sync/atomic.
|
||||||
func (b *builder) createFunctionStart() {
|
func (b *builder) createFunctionStart(intrinsic bool) {
|
||||||
if b.DumpSSA {
|
if b.DumpSSA {
|
||||||
fmt.Printf("\nfunc %s:\n", b.fn)
|
fmt.Printf("\nfunc %s:\n", b.fn)
|
||||||
}
|
}
|
||||||
|
@ -1097,20 +1101,20 @@ func (b *builder) createFunctionStart() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-create all basic blocks in the function.
|
// Pre-create all basic blocks in the function.
|
||||||
for _, block := range b.fn.DomPreorder() {
|
|
||||||
llvmBlock := b.ctx.AddBasicBlock(b.llvmFn, block.Comment)
|
|
||||||
b.blockEntries[block] = llvmBlock
|
|
||||||
b.blockExits[block] = llvmBlock
|
|
||||||
}
|
|
||||||
var entryBlock llvm.BasicBlock
|
var entryBlock llvm.BasicBlock
|
||||||
if len(b.fn.Blocks) != 0 {
|
if intrinsic {
|
||||||
// Normal functions have an entry block.
|
|
||||||
entryBlock = b.blockEntries[b.fn.Blocks[0]]
|
|
||||||
} else {
|
|
||||||
// This function isn't defined in Go SSA. It is probably a compiler
|
// This function isn't defined in Go SSA. It is probably a compiler
|
||||||
// intrinsic (like an atomic operation). Create the entry block
|
// intrinsic (like an atomic operation). Create the entry block
|
||||||
// manually.
|
// manually.
|
||||||
entryBlock = b.ctx.AddBasicBlock(b.llvmFn, "entry")
|
entryBlock = b.ctx.AddBasicBlock(b.llvmFn, "entry")
|
||||||
|
} else {
|
||||||
|
for _, block := range b.fn.DomPreorder() {
|
||||||
|
llvmBlock := b.ctx.AddBasicBlock(b.llvmFn, block.Comment)
|
||||||
|
b.blockEntries[block] = llvmBlock
|
||||||
|
b.blockExits[block] = llvmBlock
|
||||||
|
}
|
||||||
|
// Normal functions have an entry block.
|
||||||
|
entryBlock = b.blockEntries[b.fn.Blocks[0]]
|
||||||
}
|
}
|
||||||
b.SetInsertPointAtEnd(entryBlock)
|
b.SetInsertPointAtEnd(entryBlock)
|
||||||
|
|
||||||
|
@ -1192,7 +1196,7 @@ func (b *builder) createFunctionStart() {
|
||||||
// function must not yet be defined, otherwise this function will create a
|
// function must not yet be defined, otherwise this function will create a
|
||||||
// diagnostic.
|
// diagnostic.
|
||||||
func (b *builder) createFunction() {
|
func (b *builder) createFunction() {
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(false)
|
||||||
|
|
||||||
// Fill blocks with instructions.
|
// Fill blocks with instructions.
|
||||||
for _, block := range b.fn.DomPreorder() {
|
for _, block := range b.fn.DomPreorder() {
|
||||||
|
@ -1654,8 +1658,6 @@ func (b *builder) createFunctionCall(instr *ssa.CallCommon) (llvm.Value, error)
|
||||||
// applied) function call. If it is anonymous, it may be a closure.
|
// applied) function call. If it is anonymous, it may be a closure.
|
||||||
name := fn.RelString(nil)
|
name := fn.RelString(nil)
|
||||||
switch {
|
switch {
|
||||||
case name == "math.Ceil" || name == "math.Floor" || name == "math.Sqrt" || name == "math.Trunc":
|
|
||||||
return b.createMathOp(instr), nil
|
|
||||||
case name == "device.Asm" || name == "device/arm.Asm" || name == "device/arm64.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm":
|
case name == "device.Asm" || name == "device/arm.Asm" || name == "device/arm64.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm":
|
||||||
return b.createInlineAsm(instr.Args)
|
return b.createInlineAsm(instr.Args)
|
||||||
case name == "device.AsmFull" || name == "device/arm.AsmFull" || name == "device/arm64.AsmFull" || name == "device/avr.AsmFull" || name == "device/riscv.AsmFull":
|
case name == "device.AsmFull" || name == "device/arm.AsmFull" || name == "device/arm64.AsmFull" || name == "device/avr.AsmFull" || name == "device/riscv.AsmFull":
|
||||||
|
|
|
@ -27,12 +27,6 @@ type testCase struct {
|
||||||
func TestCompiler(t *testing.T) {
|
func TestCompiler(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
// Determine LLVM version.
|
|
||||||
llvmMajor, err := strconv.Atoi(strings.SplitN(llvm.Version, ".", 2)[0])
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal("could not parse LLVM version:", llvm.Version)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine which tests to run, depending on the Go and LLVM versions.
|
// Determine which tests to run, depending on the Go and LLVM versions.
|
||||||
tests := []testCase{
|
tests := []testCase{
|
||||||
{"basic.go", "", ""},
|
{"basic.go", "", ""},
|
||||||
|
@ -47,14 +41,8 @@ func TestCompiler(t *testing.T) {
|
||||||
{"goroutine.go", "wasm", "asyncify"},
|
{"goroutine.go", "wasm", "asyncify"},
|
||||||
{"goroutine.go", "cortex-m-qemu", "tasks"},
|
{"goroutine.go", "cortex-m-qemu", "tasks"},
|
||||||
{"channel.go", "", ""},
|
{"channel.go", "", ""},
|
||||||
{"intrinsics.go", "cortex-m-qemu", ""},
|
|
||||||
{"intrinsics.go", "wasm", ""},
|
|
||||||
{"gc.go", "", ""},
|
{"gc.go", "", ""},
|
||||||
}
|
}
|
||||||
if llvmMajor >= 12 {
|
|
||||||
tests = append(tests, testCase{"intrinsics.go", "cortex-m-qemu", ""})
|
|
||||||
tests = append(tests, testCase{"intrinsics.go", "wasm", ""})
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
name := tc.file
|
name := tc.file
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/tools/go/ssa"
|
|
||||||
"tinygo.org/x/go-llvm"
|
"tinygo.org/x/go-llvm"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ func (b *builder) defineIntrinsicFunction() {
|
||||||
case strings.HasPrefix(name, "runtime/volatile.Store"):
|
case strings.HasPrefix(name, "runtime/volatile.Store"):
|
||||||
b.createVolatileStore()
|
b.createVolatileStore()
|
||||||
case strings.HasPrefix(name, "sync/atomic.") && token.IsExported(b.fn.Name()):
|
case strings.HasPrefix(name, "sync/atomic.") && token.IsExported(b.fn.Name()):
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(true)
|
||||||
returnValue := b.createAtomicOp(b.fn.Name())
|
returnValue := b.createAtomicOp(b.fn.Name())
|
||||||
if !returnValue.IsNil() {
|
if !returnValue.IsNil() {
|
||||||
b.CreateRet(returnValue)
|
b.CreateRet(returnValue)
|
||||||
|
@ -44,7 +43,7 @@ func (b *builder) defineIntrinsicFunction() {
|
||||||
// specially by optimization passes possibly resulting in better generated code,
|
// specially by optimization passes possibly resulting in better generated code,
|
||||||
// and will otherwise be lowered to regular libc memcpy/memmove calls.
|
// and will otherwise be lowered to regular libc memcpy/memmove calls.
|
||||||
func (b *builder) createMemoryCopyImpl() {
|
func (b *builder) createMemoryCopyImpl() {
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(true)
|
||||||
fnName := "llvm." + b.fn.Name() + ".p0i8.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth())
|
fnName := "llvm." + b.fn.Name() + ".p0i8.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth())
|
||||||
llvmFn := b.mod.NamedFunction(fnName)
|
llvmFn := b.mod.NamedFunction(fnName)
|
||||||
if llvmFn.IsNil() {
|
if llvmFn.IsNil() {
|
||||||
|
@ -64,7 +63,7 @@ func (b *builder) createMemoryCopyImpl() {
|
||||||
// memory, declaring the function if needed. These calls will be lowered to
|
// memory, declaring the function if needed. These calls will be lowered to
|
||||||
// regular libc memset calls if they aren't optimized out in a different way.
|
// regular libc memset calls if they aren't optimized out in a different way.
|
||||||
func (b *builder) createMemoryZeroImpl() {
|
func (b *builder) createMemoryZeroImpl() {
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(true)
|
||||||
fnName := "llvm.memset.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth())
|
fnName := "llvm.memset.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth())
|
||||||
llvmFn := b.mod.NamedFunction(fnName)
|
llvmFn := b.mod.NamedFunction(fnName)
|
||||||
if llvmFn.IsNil() {
|
if llvmFn.IsNil() {
|
||||||
|
@ -88,10 +87,19 @@ var mathToLLVMMapping = map[string]string{
|
||||||
"math.Trunc": "llvm.trunc.f64",
|
"math.Trunc": "llvm.trunc.f64",
|
||||||
}
|
}
|
||||||
|
|
||||||
// createMathOp lowers the given call as a LLVM math intrinsic. It returns the
|
// defineMathOp defines a math function body as a call to a LLVM intrinsic,
|
||||||
// resulting value.
|
// instead of the regular Go implementation. This allows LLVM to reason about
|
||||||
func (b *builder) createMathOp(call *ssa.CallCommon) llvm.Value {
|
// the math operation and (depending on the architecture) allows it to lower the
|
||||||
llvmName := mathToLLVMMapping[call.StaticCallee().RelString(nil)]
|
// operation to very fast floating point instructions. If this is not possible,
|
||||||
|
// LLVM will emit a call to a libm function that implements the same operation.
|
||||||
|
//
|
||||||
|
// One example of an optimization that LLVM can do is to convert
|
||||||
|
// float32(math.Sqrt(float64(v))) to a 32-bit floating point operation, which is
|
||||||
|
// beneficial on architectures where 64-bit floating point operations are (much)
|
||||||
|
// more expensive than 32-bit ones.
|
||||||
|
func (b *builder) defineMathOp() {
|
||||||
|
b.createFunctionStart(true)
|
||||||
|
llvmName := mathToLLVMMapping[b.fn.RelString(nil)]
|
||||||
if llvmName == "" {
|
if llvmName == "" {
|
||||||
panic("unreachable: unknown math operation") // sanity check
|
panic("unreachable: unknown math operation") // sanity check
|
||||||
}
|
}
|
||||||
|
@ -104,9 +112,10 @@ func (b *builder) createMathOp(call *ssa.CallCommon) llvm.Value {
|
||||||
llvmFn = llvm.AddFunction(b.mod, llvmName, llvmType)
|
llvmFn = llvm.AddFunction(b.mod, llvmName, llvmType)
|
||||||
}
|
}
|
||||||
// Create a call to the intrinsic.
|
// Create a call to the intrinsic.
|
||||||
args := make([]llvm.Value, len(call.Args))
|
args := make([]llvm.Value, len(b.fn.Params))
|
||||||
for i, arg := range call.Args {
|
for i, param := range b.fn.Params {
|
||||||
args[i] = b.getValue(arg)
|
args[i] = b.getValue(param)
|
||||||
}
|
}
|
||||||
return b.CreateCall(llvmFn, args, "")
|
result := b.CreateCall(llvmFn, args, "")
|
||||||
|
b.CreateRet(result)
|
||||||
}
|
}
|
||||||
|
|
36
compiler/testdata/intrinsics-cortex-m-qemu.ll
предоставленный
36
compiler/testdata/intrinsics-cortex-m-qemu.ll
предоставленный
|
@ -1,36 +0,0 @@
|
||||||
; ModuleID = 'intrinsics.go'
|
|
||||||
source_filename = "intrinsics.go"
|
|
||||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
||||||
target triple = "thumbv7m-unknown-unknown-eabi"
|
|
||||||
|
|
||||||
declare noalias nonnull i8* @runtime.alloc(i32, i8*, i8*) #0
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden void @main.init(i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden double @main.mySqrt(double %x, i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
%0 = call double @llvm.sqrt.f64(double %x)
|
|
||||||
ret double %0
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
|
||||||
declare double @llvm.sqrt.f64(double) #2
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden double @main.myTrunc(double %x, i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
%0 = call double @llvm.trunc.f64(double %x)
|
|
||||||
ret double %0
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
|
||||||
declare double @llvm.trunc.f64(double) #2
|
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+armv7-m,+hwdiv,+soft-float,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" }
|
|
||||||
attributes #1 = { nounwind "target-features"="+armv7-m,+hwdiv,+soft-float,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" }
|
|
||||||
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
|
|
38
compiler/testdata/intrinsics-wasm.ll
предоставленный
38
compiler/testdata/intrinsics-wasm.ll
предоставленный
|
@ -1,38 +0,0 @@
|
||||||
; ModuleID = 'intrinsics.go'
|
|
||||||
source_filename = "intrinsics.go"
|
|
||||||
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
|
|
||||||
target triple = "wasm32-unknown-wasi"
|
|
||||||
|
|
||||||
declare noalias nonnull i8* @runtime.alloc(i32, i8*, i8*) #0
|
|
||||||
|
|
||||||
declare void @runtime.trackPointer(i8* nocapture readonly, i8*) #0
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden void @main.init(i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden double @main.mySqrt(double %x, i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
%0 = call double @llvm.sqrt.f64(double %x)
|
|
||||||
ret double %0
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
|
||||||
declare double @llvm.sqrt.f64(double) #2
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define hidden double @main.myTrunc(double %x, i8* %context) unnamed_addr #1 {
|
|
||||||
entry:
|
|
||||||
%0 = call double @llvm.trunc.f64(double %x)
|
|
||||||
ret double %0
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
|
||||||
declare double @llvm.trunc.f64(double) #2
|
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+bulk-memory,+nontrapping-fptoint,+sign-ext" }
|
|
||||||
attributes #1 = { nounwind "target-features"="+bulk-memory,+nontrapping-fptoint,+sign-ext" }
|
|
||||||
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
|
|
14
compiler/testdata/intrinsics.go
предоставленный
14
compiler/testdata/intrinsics.go
предоставленный
|
@ -1,14 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
// Test how intrinsics are lowered: either as regular calls to the math
|
|
||||||
// functions or as LLVM builtins (such as llvm.sqrt.f64).
|
|
||||||
|
|
||||||
import "math"
|
|
||||||
|
|
||||||
func mySqrt(x float64) float64 {
|
|
||||||
return math.Sqrt(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
func myTrunc(x float64) float64 {
|
|
||||||
return math.Trunc(x)
|
|
||||||
}
|
|
|
@ -6,7 +6,7 @@ package compiler
|
||||||
// createVolatileLoad is the implementation of the intrinsic function
|
// createVolatileLoad is the implementation of the intrinsic function
|
||||||
// runtime/volatile.LoadT().
|
// runtime/volatile.LoadT().
|
||||||
func (b *builder) createVolatileLoad() {
|
func (b *builder) createVolatileLoad() {
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(true)
|
||||||
addr := b.getValue(b.fn.Params[0])
|
addr := b.getValue(b.fn.Params[0])
|
||||||
b.createNilCheck(b.fn.Params[0], addr, "deref")
|
b.createNilCheck(b.fn.Params[0], addr, "deref")
|
||||||
val := b.CreateLoad(addr, "")
|
val := b.CreateLoad(addr, "")
|
||||||
|
@ -17,7 +17,7 @@ func (b *builder) createVolatileLoad() {
|
||||||
// createVolatileStore is the implementation of the intrinsic function
|
// createVolatileStore is the implementation of the intrinsic function
|
||||||
// runtime/volatile.StoreT().
|
// runtime/volatile.StoreT().
|
||||||
func (b *builder) createVolatileStore() {
|
func (b *builder) createVolatileStore() {
|
||||||
b.createFunctionStart()
|
b.createFunctionStart(true)
|
||||||
addr := b.getValue(b.fn.Params[0])
|
addr := b.getValue(b.fn.Params[0])
|
||||||
val := b.getValue(b.fn.Params[1])
|
val := b.getValue(b.fn.Params[1])
|
||||||
b.createNilCheck(b.fn.Params[0], addr, "deref")
|
b.createNilCheck(b.fn.Params[0], addr, "deref")
|
||||||
|
|
Загрузка…
Создание таблицы
Сослаться в новой задаче