From 603fff78d496445ab3d614c45c75894a9f94999e Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Mon, 7 Feb 2022 15:19:25 +0100 Subject: [PATCH] all: add support for ThinLTO ThinLTO optimizes across LLVM modules at link time. This means that optimizations (such as inlining and const-propagation) are possible between C and Go. This makes this change especially useful for CGo, but not just for CGo. By doing some optimizations at link time, the linker can discard some unused functions and this leads to a size reduction on average. It does increase code size in some cases, but that's true for most optimizations. I've excluded a number of targets for now (wasm, avr, xtensa, windows, macos). They can probably be supported with some more work, but that should be done in separate PRs. Overall, this change results in an average 3.24% size reduction over all the tinygo.org/x/drivers smoke tests. TODO: this commit runs part of the pass pipeline twice. We should set the PrepareForThinLTO flag in the PassManagerBuilder for even further reduced code size (0.7%) and improved compilation speed. --- builder/build.go | 64 ++++++++++++++++++++++------- builder/cc.go | 20 ++++++--- compileopts/config.go | 28 +++++++++++++ go.mod | 2 +- go.sum | 4 +- transform/optimizer.go | 1 + transform/stacksize.go | 26 +++++++++++- transform/testdata/stacksize.out.ll | 1 + 8 files changed, 121 insertions(+), 25 deletions(-) diff --git a/builder/build.go b/builder/build.go index 4e73bd09..3dfca4e4 100644 --- a/builder/build.go +++ b/builder/build.go @@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil defer os.RemoveAll(dir) } + // Look up the build cache directory, which is used to speed up incremental + // builds. + cacheDir := goenv.Get("GOCACHE") + if cacheDir == "off" { + // Use temporary build directory instead, effectively disabling the + // build cache. + cacheDir = dir + } + // Check for a libc dependency. // As a side effect, this also creates the headers for the given libc, if // the libc needs them. @@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Determine the path of the bitcode file (which is a serialized version // of a LLVM module). - cacheDir := goenv.Get("GOCACHE") - if cacheDir == "off" { - // Use temporary build directory instead, effectively disabling the - // build cache. - cacheDir = dir - } bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc") packageBitcodePaths[pkg.ImportPath] = bitcodePath @@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Load and link all the bitcode files. This does not yet optimize // anything, it only links the bitcode files together. ctx := llvm.NewContext() - mod = ctx.NewModule("") + mod = ctx.NewModule("main") for _, pkg := range lprogram.Sorted() { pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath]) if err != nil { @@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil } return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666) case ".bc": - data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes() - return ioutil.WriteFile(outpath, data, 0666) + var buf llvm.MemoryBuffer + if config.UseThinLTO() { + buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod) + } else { + buf = llvm.WriteBitcodeToMemoryBuffer(mod) + } + defer buf.Dispose() + return ioutil.WriteFile(outpath, buf.Bytes(), 0666) case ".ll": data := []byte(mod.String()) return ioutil.WriteFile(outpath, data, 0666) @@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil dependencies: []*compileJob{programJob}, result: objfile, run: func(*compileJob) error { - llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile) - if err != nil { - return err + var llvmBuf llvm.MemoryBuffer + if config.UseThinLTO() { + llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod) + } else { + var err error + llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile) + if err != nil { + return err + } } + defer llvmBuf.Dispose() return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666) }, } @@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil job := &compileJob{ description: "compile extra file " + path, run: func(job *compileJob) error { - result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands) + result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands) job.result = result return err }, @@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil job := &compileJob{ description: "compile CGo file " + abspath, run: func(job *compileJob) error { - result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands) + result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands) job.result = result return err }, @@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil if config.Options.PrintCommands != nil { config.Options.PrintCommands(config.Target.Linker, ldflags...) } + if config.UseThinLTO() { + ldflags = append(ldflags, + "--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"), + "-plugin-opt=mcpu="+config.CPU(), + "-plugin-opt=O"+strconv.Itoa(optLevel), + "-plugin-opt=thinlto") + if config.CodeModel() != "default" { + ldflags = append(ldflags, + "-mllvm", "-code-model="+config.CodeModel()) + } + if sizeLevel >= 2 { + // Workaround with roughly the same effect as + // https://reviews.llvm.org/D119342. + // Can hopefully be removed in LLVM 15. + ldflags = append(ldflags, + "-mllvm", "--rotation-max-header-size=0") + } + } err = link(config.Target.Linker, ldflags...) if err != nil { return &commandError{"failed to link", executable, err} @@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error { } } - if config.GOOS() != "darwin" { + if config.GOOS() != "darwin" && !config.UseThinLTO() { transform.ApplyFunctionSections(mod) // -ffunction-sections } diff --git a/builder/cc.go b/builder/cc.go index 350982d0..a1628f62 100644 --- a/builder/cc.go +++ b/builder/cc.go @@ -56,7 +56,7 @@ import ( // depfile but without invalidating its name. For this reason, the depfile is // written on each new compilation (even when it seems unnecessary). However, it // could in rare cases lead to a stale file fetched from the cache. -func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) { +func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) { // Hash input file. fileHash, err := hashFile(abspath) if err != nil { @@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock")) defer unlock() + ext := ".o" + if thinlto { + ext = ".bc" + } + // Create cache key for the dependencies file. buf, err := json.Marshal(struct { Path string @@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands } // Obtain hashes of all the files listed as a dependency. - outpath, err := makeCFileCachePath(dependencies, depfileNameHash) + outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext) if err == nil { if _, err := os.Stat(outpath); err == nil { return outpath, nil @@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands return "", err } - objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o") + objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext) if err != nil { return "", err } @@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands depTmpFile.Close() flags := append([]string{}, cflags...) // copy cflags flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies + if thinlto { + flags = append(flags, "-flto=thin") + } flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath) if strings.ToLower(filepath.Ext(abspath)) == ".s" { // If this is an assembly file (.s or .S, lowercase or uppercase), then @@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands } // Move temporary object file to final location. - outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash) + outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext) if err != nil { return "", err } @@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands // Create a cache path (a path in GOCACHE) to store the output of a compiler // job. This path is based on the dep file name (which is a hash of metadata // including compiler flags) and the hash of all input files in the paths slice. -func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) { +func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) { // Hash all input files. fileHashes := make(map[string]string, len(paths)) for _, path := range paths { @@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) outFileNameBuf := sha512.Sum512_224(buf) cacheKey := hex.EncodeToString(outFileNameBuf[:]) - outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o") + outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext) return outpath, nil } diff --git a/compileopts/config.go b/compileopts/config.go index 392ef223..fa790af6 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool { return false } +// UseThinLTO returns whether ThinLTO should be used for the given target. Some +// targets (such as wasm) are not yet supported. +// We should try and remove as many exceptions as possible in the future, so +// that this optimization can be applied in more places. +func (c *Config) UseThinLTO() bool { + parts := strings.Split(c.Triple(), "-") + if parts[0] == "wasm32" { + // wasm-ld doesn't seem to support ThinLTO yet. + return false + } + if parts[0] == "avr" || parts[0] == "xtensa" { + // These use external (GNU) linkers which might perhaps support ThinLTO + // through a plugin, but it's too much hassle to set up. + return false + } + if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") { + // We use an external linker here at the moment. + return false + } + if len(parts) >= 2 && parts[2] == "windows" { + // Linker error (undefined runtime.trackedGlobalsBitmap) when linking + // for Windows. Disable it for now until that's figured out and fixed. + return false + } + // Other architectures support ThinLTO. + return true +} + // RP2040BootPatch returns whether the RP2040 boot patch should be applied that // calculates and patches in the checksum for the 2nd stage bootloader. func (c *Config) RP2040BootPatch() bool { diff --git a/go.mod b/go.mod index b1ea3479..3e9612c9 100644 --- a/go.mod +++ b/go.mod @@ -15,5 +15,5 @@ require ( golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9 gopkg.in/yaml.v2 v2.4.0 - tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 + tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 ) diff --git a/go.sum b/go.sum index d4f01d21..d5e03912 100644 --- a/go.sum +++ b/go.sum @@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448= -tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= +tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98= +tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= diff --git a/transform/optimizer.go b/transform/optimizer.go index 0c1d8a8f..8e2ef7a8 100644 --- a/transform/optimizer.go +++ b/transform/optimizer.go @@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i funcPasses.FinalizeFunc() // Run module passes. + // TODO: somehow set the PrepareForThinLTO flag in the pass manager builder. modPasses := llvm.NewPassManager() defer modPasses.Dispose() builder.Populate(modPasses) diff --git a/transform/stacksize.go b/transform/stacksize.go index 8443166e..4d8a1381 100644 --- a/transform/stacksize.go +++ b/transform/stacksize.go @@ -11,8 +11,9 @@ import ( // modified after linking. func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string { functionMap := map[llvm.Value][]llvm.Value{} - var functions []llvm.Value + var functions []llvm.Value // ptrtoint values of functions var functionNames []string + var functionValues []llvm.Value // direct references to functions for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) { if use.FirstUse().IsNil() { // Apparently this stack size isn't used. @@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string if _, ok := functionMap[ptrtoint]; !ok { functions = append(functions, ptrtoint) functionNames = append(functionNames, ptrtoint.Operand(0).Name()) + functionValues = append(functionValues, ptrtoint.Operand(0)) } functionMap[ptrtoint] = append(functionMap[ptrtoint], use) } @@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string } stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes)) + // Add all relevant values to llvm.used (for LTO). + appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...) + // Replace the calls with loads from the new global with stack sizes. irbuilder := mod.Context().NewBuilder() defer irbuilder.Dispose() @@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string return functionNames } + +// Append the given values to the llvm.used array. The values can be any pointer +// type, they will be bitcast to i8*. +func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) { + if !mod.NamedGlobal("llvm.used").IsNil() { + // Sanity check. TODO: we don't emit such a global at the moment, but + // when we do we should append to it instead. + panic("todo: append to existing llvm.used") + } + i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0) + var castValues []llvm.Value + for _, value := range values { + castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType)) + } + usedInitializer := llvm.ConstArray(i8ptrType, castValues) + used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used") + used.SetInitializer(usedInitializer) + used.SetLinkage(llvm.AppendingLinkage) +} diff --git a/transform/testdata/stacksize.out.ll b/transform/testdata/stacksize.out.ll index 29f51e34..cea820ec 100644 --- a/transform/testdata/stacksize.out.ll +++ b/transform/testdata/stacksize.out.ll @@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7m-none-eabi" @"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes" +@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)] declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)