diff --git a/builder/build.go b/builder/build.go index 4e73bd09..3dfca4e4 100644 --- a/builder/build.go +++ b/builder/build.go @@ -105,6 +105,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil defer os.RemoveAll(dir) } + // Look up the build cache directory, which is used to speed up incremental + // builds. + cacheDir := goenv.Get("GOCACHE") + if cacheDir == "off" { + // Use temporary build directory instead, effectively disabling the + // build cache. + cacheDir = dir + } + // Check for a libc dependency. // As a side effect, this also creates the headers for the given libc, if // the libc needs them. @@ -238,12 +247,6 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Determine the path of the bitcode file (which is a serialized version // of a LLVM module). - cacheDir := goenv.Get("GOCACHE") - if cacheDir == "off" { - // Use temporary build directory instead, effectively disabling the - // build cache. - cacheDir = dir - } bitcodePath := filepath.Join(cacheDir, "pkg-"+hex.EncodeToString(hash[:])+".bc") packageBitcodePaths[pkg.ImportPath] = bitcodePath @@ -416,7 +419,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Load and link all the bitcode files. This does not yet optimize // anything, it only links the bitcode files together. ctx := llvm.NewContext() - mod = ctx.NewModule("") + mod = ctx.NewModule("main") for _, pkg := range lprogram.Sorted() { pkgMod, err := ctx.ParseBitcodeFile(packageBitcodePaths[pkg.ImportPath]) if err != nil { @@ -512,8 +515,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil } return ioutil.WriteFile(outpath, llvmBuf.Bytes(), 0666) case ".bc": - data := llvm.WriteBitcodeToMemoryBuffer(mod).Bytes() - return ioutil.WriteFile(outpath, data, 0666) + var buf llvm.MemoryBuffer + if config.UseThinLTO() { + buf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod) + } else { + buf = llvm.WriteBitcodeToMemoryBuffer(mod) + } + defer buf.Dispose() + return ioutil.WriteFile(outpath, buf.Bytes(), 0666) case ".ll": data := []byte(mod.String()) return ioutil.WriteFile(outpath, data, 0666) @@ -533,10 +542,17 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil dependencies: []*compileJob{programJob}, result: objfile, run: func(*compileJob) error { - llvmBuf, err := machine.EmitToMemoryBuffer(mod, llvm.ObjectFile) - if err != nil { - return err + var llvmBuf llvm.MemoryBuffer + if config.UseThinLTO() { + llvmBuf = llvm.WriteThinLTOBitcodeToMemoryBuffer(mod) + } else { + var err error + llvmBuf, err = machine.EmitToMemoryBuffer(mod, llvm.ObjectFile) + if err != nil { + return err + } } + defer llvmBuf.Dispose() return ioutil.WriteFile(objfile, llvmBuf.Bytes(), 0666) }, } @@ -569,7 +585,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil job := &compileJob{ description: "compile extra file " + path, run: func(job *compileJob) error { - result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.Options.PrintCommands) + result, err := compileAndCacheCFile(abspath, dir, config.CFlags(), config.UseThinLTO(), config.Options.PrintCommands) job.result = result return err }, @@ -587,7 +603,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil job := &compileJob{ description: "compile CGo file " + abspath, run: func(job *compileJob) error { - result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.Options.PrintCommands) + result, err := compileAndCacheCFile(abspath, dir, pkg.CFlags, config.UseThinLTO(), config.Options.PrintCommands) job.result = result return err }, @@ -656,6 +672,24 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil if config.Options.PrintCommands != nil { config.Options.PrintCommands(config.Target.Linker, ldflags...) } + if config.UseThinLTO() { + ldflags = append(ldflags, + "--thinlto-cache-dir="+filepath.Join(cacheDir, "thinlto"), + "-plugin-opt=mcpu="+config.CPU(), + "-plugin-opt=O"+strconv.Itoa(optLevel), + "-plugin-opt=thinlto") + if config.CodeModel() != "default" { + ldflags = append(ldflags, + "-mllvm", "-code-model="+config.CodeModel()) + } + if sizeLevel >= 2 { + // Workaround with roughly the same effect as + // https://reviews.llvm.org/D119342. + // Can hopefully be removed in LLVM 15. + ldflags = append(ldflags, + "-mllvm", "--rotation-max-header-size=0") + } + } err = link(config.Target.Linker, ldflags...) if err != nil { return &commandError{"failed to link", executable, err} @@ -846,7 +880,7 @@ func optimizeProgram(mod llvm.Module, config *compileopts.Config) error { } } - if config.GOOS() != "darwin" { + if config.GOOS() != "darwin" && !config.UseThinLTO() { transform.ApplyFunctionSections(mod) // -ffunction-sections } diff --git a/builder/cc.go b/builder/cc.go index 350982d0..a1628f62 100644 --- a/builder/cc.go +++ b/builder/cc.go @@ -56,7 +56,7 @@ import ( // depfile but without invalidating its name. For this reason, the depfile is // written on each new compilation (even when it seems unnecessary). However, it // could in rare cases lead to a stale file fetched from the cache. -func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands func(string, ...string)) (string, error) { +func compileAndCacheCFile(abspath, tmpdir string, cflags []string, thinlto bool, printCommands func(string, ...string)) (string, error) { // Hash input file. fileHash, err := hashFile(abspath) if err != nil { @@ -67,6 +67,11 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands unlock := lock(filepath.Join(goenv.Get("GOCACHE"), fileHash+".c.lock")) defer unlock() + ext := ".o" + if thinlto { + ext = ".bc" + } + // Create cache key for the dependencies file. buf, err := json.Marshal(struct { Path string @@ -99,7 +104,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands } // Obtain hashes of all the files listed as a dependency. - outpath, err := makeCFileCachePath(dependencies, depfileNameHash) + outpath, err := makeCFileCachePath(dependencies, depfileNameHash, ext) if err == nil { if _, err := os.Stat(outpath); err == nil { return outpath, nil @@ -112,7 +117,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands return "", err } - objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o") + objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*"+ext) if err != nil { return "", err } @@ -124,6 +129,9 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands depTmpFile.Close() flags := append([]string{}, cflags...) // copy cflags flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies + if thinlto { + flags = append(flags, "-flto=thin") + } flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath) if strings.ToLower(filepath.Ext(abspath)) == ".s" { // If this is an assembly file (.s or .S, lowercase or uppercase), then @@ -181,7 +189,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands } // Move temporary object file to final location. - outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash) + outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash, ext) if err != nil { return "", err } @@ -196,7 +204,7 @@ func compileAndCacheCFile(abspath, tmpdir string, cflags []string, printCommands // Create a cache path (a path in GOCACHE) to store the output of a compiler // job. This path is based on the dep file name (which is a hash of metadata // including compiler flags) and the hash of all input files in the paths slice. -func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) { +func makeCFileCachePath(paths []string, depfileNameHash, ext string) (string, error) { // Hash all input files. fileHashes := make(map[string]string, len(paths)) for _, path := range paths { @@ -221,7 +229,7 @@ func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) outFileNameBuf := sha512.Sum512_224(buf) cacheKey := hex.EncodeToString(outFileNameBuf[:]) - outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o") + outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+ext) return outpath, nil } diff --git a/compileopts/config.go b/compileopts/config.go index 392ef223..fa790af6 100644 --- a/compileopts/config.go +++ b/compileopts/config.go @@ -176,6 +176,34 @@ func (c *Config) AutomaticStackSize() bool { return false } +// UseThinLTO returns whether ThinLTO should be used for the given target. Some +// targets (such as wasm) are not yet supported. +// We should try and remove as many exceptions as possible in the future, so +// that this optimization can be applied in more places. +func (c *Config) UseThinLTO() bool { + parts := strings.Split(c.Triple(), "-") + if parts[0] == "wasm32" { + // wasm-ld doesn't seem to support ThinLTO yet. + return false + } + if parts[0] == "avr" || parts[0] == "xtensa" { + // These use external (GNU) linkers which might perhaps support ThinLTO + // through a plugin, but it's too much hassle to set up. + return false + } + if len(parts) >= 2 && strings.HasPrefix(parts[2], "macos") { + // We use an external linker here at the moment. + return false + } + if len(parts) >= 2 && parts[2] == "windows" { + // Linker error (undefined runtime.trackedGlobalsBitmap) when linking + // for Windows. Disable it for now until that's figured out and fixed. + return false + } + // Other architectures support ThinLTO. + return true +} + // RP2040BootPatch returns whether the RP2040 boot patch should be applied that // calculates and patches in the checksum for the 2nd stage bootloader. func (c *Config) RP2040BootPatch() bool { diff --git a/go.mod b/go.mod index b1ea3479..3e9612c9 100644 --- a/go.mod +++ b/go.mod @@ -15,5 +15,5 @@ require ( golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 golang.org/x/tools v0.1.6-0.20210813165731-45389f592fe9 gopkg.in/yaml.v2 v2.4.0 - tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 + tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 ) diff --git a/go.sum b/go.sum index d4f01d21..d5e03912 100644 --- a/go.sum +++ b/go.sum @@ -80,5 +80,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3 h1:vQSFy0kNQegAfL/F6iyWQa4bF941Xc1gyJUkGy2m448= -tinygo.org/x/go-llvm v0.0.0-20220121152956-4fa2ab2718f3/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= +tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1 h1:6G8AxueDdqobCEqQrmHPLaEH1AZ1p6Y7rGElDNT7N98= +tinygo.org/x/go-llvm v0.0.0-20220211075103-ee4aad45c3a1/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= diff --git a/transform/optimizer.go b/transform/optimizer.go index 0c1d8a8f..8e2ef7a8 100644 --- a/transform/optimizer.go +++ b/transform/optimizer.go @@ -151,6 +151,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config, optLevel, sizeLevel i funcPasses.FinalizeFunc() // Run module passes. + // TODO: somehow set the PrepareForThinLTO flag in the pass manager builder. modPasses := llvm.NewPassManager() defer modPasses.Dispose() builder.Populate(modPasses) diff --git a/transform/stacksize.go b/transform/stacksize.go index 8443166e..4d8a1381 100644 --- a/transform/stacksize.go +++ b/transform/stacksize.go @@ -11,8 +11,9 @@ import ( // modified after linking. func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string { functionMap := map[llvm.Value][]llvm.Value{} - var functions []llvm.Value + var functions []llvm.Value // ptrtoint values of functions var functionNames []string + var functionValues []llvm.Value // direct references to functions for _, use := range getUses(mod.NamedFunction("internal/task.getGoroutineStackSize")) { if use.FirstUse().IsNil() { // Apparently this stack size isn't used. @@ -23,6 +24,7 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string if _, ok := functionMap[ptrtoint]; !ok { functions = append(functions, ptrtoint) functionNames = append(functionNames, ptrtoint.Operand(0).Name()) + functionValues = append(functionValues, ptrtoint.Operand(0)) } functionMap[ptrtoint] = append(functionMap[ptrtoint], use) } @@ -44,6 +46,9 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string } stackSizesGlobal.SetInitializer(llvm.ConstArray(functions[0].Type(), defaultStackSizes)) + // Add all relevant values to llvm.used (for LTO). + appendToUsedGlobals(mod, append([]llvm.Value{stackSizesGlobal}, functionValues...)...) + // Replace the calls with loads from the new global with stack sizes. irbuilder := mod.Context().NewBuilder() defer irbuilder.Dispose() @@ -62,3 +67,22 @@ func CreateStackSizeLoads(mod llvm.Module, config *compileopts.Config) []string return functionNames } + +// Append the given values to the llvm.used array. The values can be any pointer +// type, they will be bitcast to i8*. +func appendToUsedGlobals(mod llvm.Module, values ...llvm.Value) { + if !mod.NamedGlobal("llvm.used").IsNil() { + // Sanity check. TODO: we don't emit such a global at the moment, but + // when we do we should append to it instead. + panic("todo: append to existing llvm.used") + } + i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0) + var castValues []llvm.Value + for _, value := range values { + castValues = append(castValues, llvm.ConstBitCast(value, i8ptrType)) + } + usedInitializer := llvm.ConstArray(i8ptrType, castValues) + used := llvm.AddGlobal(mod, usedInitializer.Type(), "llvm.used") + used.SetInitializer(usedInitializer) + used.SetLinkage(llvm.AppendingLinkage) +} diff --git a/transform/testdata/stacksize.out.ll b/transform/testdata/stacksize.out.ll index 29f51e34..cea820ec 100644 --- a/transform/testdata/stacksize.out.ll +++ b/transform/testdata/stacksize.out.ll @@ -2,6 +2,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7m-none-eabi" @"internal/task.stackSizes" = global [1 x i32] [i32 1024], section ".tinygo_stacksizes" +@llvm.used = appending global [2 x i8*] [i8* bitcast ([1 x i32]* @"internal/task.stackSizes" to i8*), i8* bitcast (void (i8*)* @"runtime.run$1$gowrapper" to i8*)] declare i32 @"internal/task.getGoroutineStackSize"(i32, i8*, i8*)