diff --git a/compiler/compiler.go b/compiler/compiler.go index a1101b71..8443c8f2 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -790,6 +790,10 @@ func (c *compilerContext) createPackage(irbuilder llvm.Builder, pkg *ssa.Package } // Create the function definition. b := newBuilder(c, irbuilder, member) + if _, ok := mathToLLVMMapping[member.RelString(nil)]; ok { + b.defineMathOp() + continue + } if member.Blocks == nil { // Try to define this as an intrinsic function. b.defineIntrinsicFunction() @@ -1024,7 +1028,7 @@ func (c *compilerContext) getEmbedFileString(file *loader.EmbedFile) llvm.Value // parameters, create basic blocks, and set up debug information. // This is separated out from createFunction() so that it is also usable to // define compiler intrinsics like the atomic operations in sync/atomic. -func (b *builder) createFunctionStart() { +func (b *builder) createFunctionStart(intrinsic bool) { if b.DumpSSA { fmt.Printf("\nfunc %s:\n", b.fn) } @@ -1097,20 +1101,20 @@ func (b *builder) createFunctionStart() { } // Pre-create all basic blocks in the function. - for _, block := range b.fn.DomPreorder() { - llvmBlock := b.ctx.AddBasicBlock(b.llvmFn, block.Comment) - b.blockEntries[block] = llvmBlock - b.blockExits[block] = llvmBlock - } var entryBlock llvm.BasicBlock - if len(b.fn.Blocks) != 0 { - // Normal functions have an entry block. - entryBlock = b.blockEntries[b.fn.Blocks[0]] - } else { + if intrinsic { // This function isn't defined in Go SSA. It is probably a compiler // intrinsic (like an atomic operation). Create the entry block // manually. entryBlock = b.ctx.AddBasicBlock(b.llvmFn, "entry") + } else { + for _, block := range b.fn.DomPreorder() { + llvmBlock := b.ctx.AddBasicBlock(b.llvmFn, block.Comment) + b.blockEntries[block] = llvmBlock + b.blockExits[block] = llvmBlock + } + // Normal functions have an entry block. + entryBlock = b.blockEntries[b.fn.Blocks[0]] } b.SetInsertPointAtEnd(entryBlock) @@ -1192,7 +1196,7 @@ func (b *builder) createFunctionStart() { // function must not yet be defined, otherwise this function will create a // diagnostic. func (b *builder) createFunction() { - b.createFunctionStart() + b.createFunctionStart(false) // Fill blocks with instructions. for _, block := range b.fn.DomPreorder() { @@ -1654,8 +1658,6 @@ func (b *builder) createFunctionCall(instr *ssa.CallCommon) (llvm.Value, error) // applied) function call. If it is anonymous, it may be a closure. name := fn.RelString(nil) switch { - case name == "math.Ceil" || name == "math.Floor" || name == "math.Sqrt" || name == "math.Trunc": - return b.createMathOp(instr), nil case name == "device.Asm" || name == "device/arm.Asm" || name == "device/arm64.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm": return b.createInlineAsm(instr.Args) case name == "device.AsmFull" || name == "device/arm.AsmFull" || name == "device/arm64.AsmFull" || name == "device/avr.AsmFull" || name == "device/riscv.AsmFull": diff --git a/compiler/compiler_test.go b/compiler/compiler_test.go index 3d9dd868..81deacbc 100644 --- a/compiler/compiler_test.go +++ b/compiler/compiler_test.go @@ -27,12 +27,6 @@ type testCase struct { func TestCompiler(t *testing.T) { t.Parallel() - // Determine LLVM version. - llvmMajor, err := strconv.Atoi(strings.SplitN(llvm.Version, ".", 2)[0]) - if err != nil { - t.Fatal("could not parse LLVM version:", llvm.Version) - } - // Determine which tests to run, depending on the Go and LLVM versions. tests := []testCase{ {"basic.go", "", ""}, @@ -47,14 +41,8 @@ func TestCompiler(t *testing.T) { {"goroutine.go", "wasm", "asyncify"}, {"goroutine.go", "cortex-m-qemu", "tasks"}, {"channel.go", "", ""}, - {"intrinsics.go", "cortex-m-qemu", ""}, - {"intrinsics.go", "wasm", ""}, {"gc.go", "", ""}, } - if llvmMajor >= 12 { - tests = append(tests, testCase{"intrinsics.go", "cortex-m-qemu", ""}) - tests = append(tests, testCase{"intrinsics.go", "wasm", ""}) - } for _, tc := range tests { name := tc.file diff --git a/compiler/intrinsics.go b/compiler/intrinsics.go index 71b3e8a2..76bdacff 100644 --- a/compiler/intrinsics.go +++ b/compiler/intrinsics.go @@ -7,7 +7,6 @@ import ( "strconv" "strings" - "golang.org/x/tools/go/ssa" "tinygo.org/x/go-llvm" ) @@ -29,7 +28,7 @@ func (b *builder) defineIntrinsicFunction() { case strings.HasPrefix(name, "runtime/volatile.Store"): b.createVolatileStore() case strings.HasPrefix(name, "sync/atomic.") && token.IsExported(b.fn.Name()): - b.createFunctionStart() + b.createFunctionStart(true) returnValue := b.createAtomicOp(b.fn.Name()) if !returnValue.IsNil() { b.CreateRet(returnValue) @@ -44,7 +43,7 @@ func (b *builder) defineIntrinsicFunction() { // specially by optimization passes possibly resulting in better generated code, // and will otherwise be lowered to regular libc memcpy/memmove calls. func (b *builder) createMemoryCopyImpl() { - b.createFunctionStart() + b.createFunctionStart(true) fnName := "llvm." + b.fn.Name() + ".p0i8.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth()) llvmFn := b.mod.NamedFunction(fnName) if llvmFn.IsNil() { @@ -64,7 +63,7 @@ func (b *builder) createMemoryCopyImpl() { // memory, declaring the function if needed. These calls will be lowered to // regular libc memset calls if they aren't optimized out in a different way. func (b *builder) createMemoryZeroImpl() { - b.createFunctionStart() + b.createFunctionStart(true) fnName := "llvm.memset.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth()) llvmFn := b.mod.NamedFunction(fnName) if llvmFn.IsNil() { @@ -88,10 +87,19 @@ var mathToLLVMMapping = map[string]string{ "math.Trunc": "llvm.trunc.f64", } -// createMathOp lowers the given call as a LLVM math intrinsic. It returns the -// resulting value. -func (b *builder) createMathOp(call *ssa.CallCommon) llvm.Value { - llvmName := mathToLLVMMapping[call.StaticCallee().RelString(nil)] +// defineMathOp defines a math function body as a call to a LLVM intrinsic, +// instead of the regular Go implementation. This allows LLVM to reason about +// the math operation and (depending on the architecture) allows it to lower the +// operation to very fast floating point instructions. If this is not possible, +// LLVM will emit a call to a libm function that implements the same operation. +// +// One example of an optimization that LLVM can do is to convert +// float32(math.Sqrt(float64(v))) to a 32-bit floating point operation, which is +// beneficial on architectures where 64-bit floating point operations are (much) +// more expensive than 32-bit ones. +func (b *builder) defineMathOp() { + b.createFunctionStart(true) + llvmName := mathToLLVMMapping[b.fn.RelString(nil)] if llvmName == "" { panic("unreachable: unknown math operation") // sanity check } @@ -104,9 +112,10 @@ func (b *builder) createMathOp(call *ssa.CallCommon) llvm.Value { llvmFn = llvm.AddFunction(b.mod, llvmName, llvmType) } // Create a call to the intrinsic. - args := make([]llvm.Value, len(call.Args)) - for i, arg := range call.Args { - args[i] = b.getValue(arg) + args := make([]llvm.Value, len(b.fn.Params)) + for i, param := range b.fn.Params { + args[i] = b.getValue(param) } - return b.CreateCall(llvmFn, args, "") + result := b.CreateCall(llvmFn, args, "") + b.CreateRet(result) } diff --git a/compiler/testdata/intrinsics-cortex-m-qemu.ll b/compiler/testdata/intrinsics-cortex-m-qemu.ll deleted file mode 100644 index de29b2aa..00000000 --- a/compiler/testdata/intrinsics-cortex-m-qemu.ll +++ /dev/null @@ -1,36 +0,0 @@ -; ModuleID = 'intrinsics.go' -source_filename = "intrinsics.go" -target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv7m-unknown-unknown-eabi" - -declare noalias nonnull i8* @runtime.alloc(i32, i8*, i8*) #0 - -; Function Attrs: nounwind -define hidden void @main.init(i8* %context) unnamed_addr #1 { -entry: - ret void -} - -; Function Attrs: nounwind -define hidden double @main.mySqrt(double %x, i8* %context) unnamed_addr #1 { -entry: - %0 = call double @llvm.sqrt.f64(double %x) - ret double %0 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare double @llvm.sqrt.f64(double) #2 - -; Function Attrs: nounwind -define hidden double @main.myTrunc(double %x, i8* %context) unnamed_addr #1 { -entry: - %0 = call double @llvm.trunc.f64(double %x) - ret double %0 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare double @llvm.trunc.f64(double) #2 - -attributes #0 = { "target-features"="+armv7-m,+hwdiv,+soft-float,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" } -attributes #1 = { nounwind "target-features"="+armv7-m,+hwdiv,+soft-float,+strict-align,+thumb-mode,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-dsp,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-hwdiv-arm,-i8mm,-lob,-mve,-mve.fp,-neon,-pacbti,-ras,-sb,-sha2,-vfp2,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" } -attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } diff --git a/compiler/testdata/intrinsics-wasm.ll b/compiler/testdata/intrinsics-wasm.ll deleted file mode 100644 index 833b6f6e..00000000 --- a/compiler/testdata/intrinsics-wasm.ll +++ /dev/null @@ -1,38 +0,0 @@ -; ModuleID = 'intrinsics.go' -source_filename = "intrinsics.go" -target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" -target triple = "wasm32-unknown-wasi" - -declare noalias nonnull i8* @runtime.alloc(i32, i8*, i8*) #0 - -declare void @runtime.trackPointer(i8* nocapture readonly, i8*) #0 - -; Function Attrs: nounwind -define hidden void @main.init(i8* %context) unnamed_addr #1 { -entry: - ret void -} - -; Function Attrs: nounwind -define hidden double @main.mySqrt(double %x, i8* %context) unnamed_addr #1 { -entry: - %0 = call double @llvm.sqrt.f64(double %x) - ret double %0 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare double @llvm.sqrt.f64(double) #2 - -; Function Attrs: nounwind -define hidden double @main.myTrunc(double %x, i8* %context) unnamed_addr #1 { -entry: - %0 = call double @llvm.trunc.f64(double %x) - ret double %0 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare double @llvm.trunc.f64(double) #2 - -attributes #0 = { "target-features"="+bulk-memory,+nontrapping-fptoint,+sign-ext" } -attributes #1 = { nounwind "target-features"="+bulk-memory,+nontrapping-fptoint,+sign-ext" } -attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } diff --git a/compiler/testdata/intrinsics.go b/compiler/testdata/intrinsics.go deleted file mode 100644 index fdca446f..00000000 --- a/compiler/testdata/intrinsics.go +++ /dev/null @@ -1,14 +0,0 @@ -package main - -// Test how intrinsics are lowered: either as regular calls to the math -// functions or as LLVM builtins (such as llvm.sqrt.f64). - -import "math" - -func mySqrt(x float64) float64 { - return math.Sqrt(x) -} - -func myTrunc(x float64) float64 { - return math.Trunc(x) -} diff --git a/compiler/volatile.go b/compiler/volatile.go index fe690d8b..143eff28 100644 --- a/compiler/volatile.go +++ b/compiler/volatile.go @@ -6,7 +6,7 @@ package compiler // createVolatileLoad is the implementation of the intrinsic function // runtime/volatile.LoadT(). func (b *builder) createVolatileLoad() { - b.createFunctionStart() + b.createFunctionStart(true) addr := b.getValue(b.fn.Params[0]) b.createNilCheck(b.fn.Params[0], addr, "deref") val := b.CreateLoad(addr, "") @@ -17,7 +17,7 @@ func (b *builder) createVolatileLoad() { // createVolatileStore is the implementation of the intrinsic function // runtime/volatile.StoreT(). func (b *builder) createVolatileStore() { - b.createFunctionStart() + b.createFunctionStart(true) addr := b.getValue(b.fn.Params[0]) val := b.getValue(b.fn.Params[1]) b.createNilCheck(b.fn.Params[0], addr, "deref")